Import Data¶
import requests
from pyspark.sql import SparkSession
import pyspark.pandas as ps
import matplotlib.pyplot as plt
import pprint
from pyspark.sql.functions import from_unixtime, col, mean, stddev, abs as pyspark_abs
import pandas as pd
from pyspark.sql.functions import col, count, split, size, length, when, isnan, regexp_extract, expr
ps.set_option('plotting.backend', 'matplotlib')
from pyspark.sql import Row
from pyspark.sql.types import StructType, StructField, StringType
import re
# Initialize Spark session
spark = SparkSession.builder.appName("ReadCSVFile").getOrCreate()
# Define the presigned URL
local_file_path = 'dbfs:/FileStore/files/FinalClean_100K.csv'
# Load the local CSV file into a pandas-on-Spark DataFrame
df = ps.read_csv(local_file_path, sep=";")
# Show the first few rows of the DataFrame
df.head()
| category | title | body | amenities | bathrooms | bedrooms | currency | fee | has_photo | pets_allowed | price | price_type | square_feet | cityname | state | latitude | longitude | source | time | week_of_month | has_Tennis | has_Parking | has_Alarm | has_Golf | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_None | pets_allowed_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | housing/rent | Excellent home with 2 bdrooms, loft, and 1 BA.... | Every room has a ceiling fan in it, along with... | Alarm,Fireplace,Refrigerator | 1.0 | 3 | USD | No | Yes | Cats,Dogs | 800.0 | Monthly | 2500.0 | Kansas City | MO | 39.0342 | -94.5429 | RentDigs.com | 2019-02-22 07:39:28 | 4 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 |
| 1 | housing/rent/apartment | $1,000 / Two BR - Great Deal. MUST SEE. Cat OK! | Spacious two beds apartment in historic Bowers... | AC,Cable or Satellite,Dishwasher,Garbage Dispo... | 2.0 | 2 | USD | No | Yes | Cats | 1000.0 | Monthly | 875.0 | Richmond | VA | 37.5423 | -77.4347 | RentDigs.com | 2019-02-22 09:43:14 | 4 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 1 |
| 2 | housing/rent/apartment | $1,017 / One BR - Great Deal. MUST SEE! | Square footage: 780 square feet, unit number: ... | AC,Dishwasher,Fireplace,Gated,Gym,Patio/Deck,P... | 1.0 | 1 | USD | No | Thumbnail | Cats,Dogs | 1017.0 | Monthly | 780.0 | Lawrenceville | GA | 33.9222 | -84.0725 | RentDigs.com | 2019-09-18 03:09:41 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 |
| 3 | housing/rent/apartment | $1,023 / Two BR - Great Deal. MUST SEE. Pet OK! | Come home and enjoy all the luxuries you Fores... | Parking,Pool,Washer Dryer | 2.0 | 2 | USD | No | Yes | Cats,Dogs | 1023.0 | Monthly | 1115.0 | Bahama | NC | 36.1599 | -78.8975 | RentDigs.com | 2019-09-17 21:51:57 | 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 4 | housing/rent/apartment | $1,025/mo \ Two BA \ Apartment - convenient lo... | Square footage: 1300 square ft, unit number: 5... | Pool | 1.5 | 2 | USD | No | Yes | Cats,Dogs | 1025.0 | Monthly | 1300.0 | Tampa | FL | 28.0395 | -82.3952 | RentDigs.com | 2019-02-22 09:26:04 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
display(df.columns)
Index(['category', 'title', 'body', 'amenities', 'bathrooms', 'bedrooms',
'currency', 'fee', 'has_photo', 'pets_allowed', 'price', 'price_type',
'square_feet', 'cityname', 'state', 'latitude', 'longitude', 'source',
'time', 'week_of_month', 'has_Tennis', 'has_Parking', 'has_Alarm',
'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
'has_photo_no', 'has_photo_yes', 'pets_allowed_None',
'pets_allowed_Yes'],
dtype='object')
included_numeric_types= ['float64', 'int64', 'float32','int32','int64','int8']
numeric_df = df.select_dtypes(include=included_numeric_types)
numeric_df.columns
Index(['bathrooms', 'bedrooms', 'price', 'square_feet', 'latitude',
'longitude', 'week_of_month', 'has_Tennis', 'has_Parking', 'has_Alarm',
'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
'has_photo_no', 'has_photo_yes', 'pets_allowed_None',
'pets_allowed_Yes'],
dtype='object')
numeric_df.head()
| bathrooms | bedrooms | price | square_feet | latitude | longitude | week_of_month | has_Tennis | has_Parking | has_Alarm | has_Golf | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_None | pets_allowed_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6056 | 2.0 | 2 | 1007.0 | 955.0 | 35.7551 | -78.7199 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 |
| 6057 | 1.0 | 1 | 1009.0 | 780.0 | 36.1106 | -79.7406 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 6058 | 1.0 | 2 | 1010.0 | 1075.0 | 36.8379 | -76.0939 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 6059 | 1.0 | 1 | 1010.0 | 659.0 | 39.1627 | -76.6354 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 |
| 6060 | 2.0 | 2 | 1011.0 | 1000.0 | 33.9743 | -84.2384 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 |
print(numeric_df.columns)
Index(['bathrooms', 'bedrooms', 'price', 'square_feet', 'latitude',
'longitude', 'week_of_month', 'has_Tennis', 'has_Parking', 'has_Alarm',
'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
'has_photo_no', 'has_photo_yes', 'pets_allowed_None',
'pets_allowed_Yes'],
dtype='object')
#remove spatial outliers.
numeric_df = numeric_df[numeric_df['longitude'] >= -130]
numeric_df = numeric_df.drop(columns='pets_allowed_None')
numeric_df.head()
| bathrooms | bedrooms | price | square_feet | latitude | longitude | week_of_month | has_Tennis | has_Parking | has_Alarm | has_Golf | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 3 | 800.0 | 2500.0 | 39.0342 | -94.5429 | 4 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
| 1 | 2.0 | 2 | 1000.0 | 875.0 | 37.5423 | -77.4347 | 4 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 1 |
| 2 | 1.0 | 1 | 1017.0 | 780.0 | 33.9222 | -84.0725 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 1 |
| 3 | 2.0 | 2 | 1023.0 | 1115.0 | 36.1599 | -78.8975 | 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| 4 | 1.5 | 2 | 1025.0 | 1300.0 | 28.0395 | -82.3952 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
import pyspark.pandas as ps
# Assuming numeric_df is your Spark on Pandas DataFrame
# Select only the columns you want
columns_to_include = ['bathrooms', 'bedrooms', 'price', 'square_feet', 'latitude', 'longitude']
selected_df = numeric_df[columns_to_include]
# Generate the description
description = selected_df.describe()
# Function to format to 2 decimal places
def format_to_2_decimals(x):
try:
return f"{float(x):.2f}"
except (ValueError, TypeError):
return x
# Apply the formatting function to all cells except the 'count' row
formatted_description = description.apply(lambda x: x.map(format_to_2_decimals) if x.name != 'count' else x)
# Display the result
display(formatted_description)
| bathrooms | bedrooms | price | square_feet | latitude | longitude |
|---|---|---|---|---|---|
| 99517.00 | 99517.00 | 99517.00 | 99517.00 | 99517.00 | 99517.00 |
| 1.44 | 1.73 | 1525.54 | 956.05 | 36.93 | -91.49 |
| 0.55 | 0.75 | 902.06 | 387.22 | 4.56 | 15.74 |
| 0.00 | 0.00 | 100.00 | 101.00 | 24.56 | -124.23 |
| 1.00 | 1.00 | 1014.00 | 730.00 | 33.75 | -104.79 |
| 1.00 | 2.00 | 1350.00 | 900.00 | 37.21 | -84.54 |
| 2.00 | 2.00 | 1795.00 | 1115.00 | 39.96 | -77.57 |
| 9.00 | 9.00 | 52500.00 | 40000.00 | 48.85 | -68.78 |
numeric_df.describe()
| bathrooms | bedrooms | price | square_feet | latitude | longitude | week_of_month | has_Tennis | has_Parking | has_Alarm | has_Golf | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.00000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 |
| mean | 1.444628 | 1.725745 | 1525.544319 | 956.051479 | 36.934093 | -91.487287 | 3.161992 | 0.085664 | 0.441040 | 0.003658 | 0.000271 | 0.045279 | 0.192389 | 0.113930 | 0.149874 | 0.126029 | 0.159953 | 0.087171 | 0.438227 | 0.089251 | 0.111549 | 0.021042 | 0.043601 | 0.040124 | 0.375896 | 0.217923 | 0.002191 | 0.166776 | 0.262096 | 0.266879 | 0.038938 | 0.00208 | 0.159159 | 0.150316 | 0.092748 | 0.562818 | 0.949516 |
| std | 0.547771 | 0.750469 | 902.055852 | 387.219284 | 4.560699 | 15.737561 | 0.834220 | 0.279868 | 0.496514 | 0.060368 | 0.016469 | 0.207916 | 0.394179 | 0.317728 | 0.356950 | 0.331883 | 0.366564 | 0.282087 | 0.496172 | 0.285107 | 0.314812 | 0.143524 | 0.204206 | 0.196251 | 0.484356 | 0.412837 | 0.046753 | 0.372777 | 0.439777 | 0.442331 | 0.193448 | 0.04556 | 0.365826 | 0.357383 | 0.290080 | 0.496041 | 0.218942 |
| min | 0.000000 | 0.000000 | 100.000000 | 101.000000 | 24.564500 | -124.226500 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 1.000000 | 1.000000 | 1014.000000 | 730.000000 | 33.746500 | -104.791900 | 3.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 |
| 50% | 1.000000 | 2.000000 | 1350.000000 | 900.000000 | 37.213900 | -84.538200 | 3.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 |
| 75% | 2.000000 | 2.000000 | 1795.000000 | 1115.000000 | 39.955900 | -77.569900 | 4.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 |
| max | 9.000000 | 9.000000 | 52500.000000 | 40000.000000 | 48.846700 | -68.778800 | 5.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.00000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
Correlation Matrix¶
Our data may have a lot of highly correlated columns so we need to perform a bit of feature reduction to decrease multicollinearity. We start with a correlation matrix heatmap to view columns with correlations higher than .6
We removed highly correlated and constant columns in our data preprocessing but we may need to continue removing columns. Currently, bedroom, bathrooms, and square footage is highly correlated which makes sense. We may not be able to untangle this easily, we will perform VIF analysis to see if there are any very highly correlated columns which we can edit there.
from pyspark.ml.stat import Correlation
from pyspark.ml.feature import VectorAssembler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
numeric_column_names = numeric_df.columns.tolist()
# Convert pandas-on-Spark DataFrame to PySpark DataFrame
df_spark = numeric_df.to_spark()
# Convert to vector column
vector_col = "corr_features"
assembler = VectorAssembler(inputCols=df_spark.columns, outputCol=vector_col)
df_vector = assembler.transform(df_spark).select(vector_col)
# Calculate correlation matrix
matrix = Correlation.corr(df_vector, vector_col)
# Convert to numpy array
corr_matrix = matrix.collect()[0]["pearson({})".format(vector_col)].toArray()
# Create a heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0,
xticklabels=numeric_column_names, yticklabels=numeric_column_names)
plt.title('Correlation Matrix')
plt.tight_layout()
We must remove the has_unknown column to decrease ambiguity of the features. Same with allows_pets_unknown. These were added as the NaN values and we filled them with 'unknown'
from pyspark.ml.stat import Correlation
from pyspark.ml.feature import VectorAssembler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
def compute_correlation_matrix(df, method='pearson', threshold=0.6):
numeric_columns = df.columns.tolist()
# Convert pandas-on-Spark DataFrame to PySpark DataFrame
df_spark = df[numeric_columns].to_spark()
# Create a vector column
vector_col = "features"
assembler = VectorAssembler(inputCols=numeric_columns, outputCol=vector_col)
df_vector = assembler.transform(df_spark)
# Cache the dataset
df_vector.cache()
# Calculate correlation matrix
correlation = Correlation.corr(df_vector, vector_col, method)
# Extract the correlation matrix
correlation_matrix = correlation.collect()[0][0].toArray()
# Unpersist the cached data
df_vector.unpersist()
return correlation_matrix, numeric_columns
def plot_correlation_heatmap(correlation_matrix, column_names, threshold=0.6, title='Correlation Matrix'):
# Create a mask for correlations below the threshold (in absolute value)
mask = np.abs(correlation_matrix) <= threshold
# Set diagonal to False to always show self-correlations
np.fill_diagonal(mask, False)
# Calculate figure size based on number of columns
n_cols = len(column_names)
fig_size = (n_cols * 0.8 + 2, n_cols * 0.8 + 2) # Adjust these multipliers as needed
# Create a heatmap
plt.figure(figsize=fig_size)
sns.heatmap(correlation_matrix,
mask=mask,
annot=True,
cmap='coolwarm',
vmin=-1,
vmax=1,
center=0,
square=True,
linewidths=0.5,
fmt='.2f',
xticklabels=column_names,
yticklabels=column_names,
annot_kws={"size": 16}, # Adjust text size as needed
cbar_kws={"shrink": .8}) # Adjust colorbar size
plt.title(f"{title} (|r| > {threshold})")
plt.xticks(rotation=75)
plt.yticks(rotation=0)
# plt.gcf()
# If you're in a Databricks notebook, use display() instead of plt.show()
return plt.tight_layout()
# Compute correlation matrix (you can change 'pearson' to 'spearman' if needed)
correlation_matrix, column_names = compute_correlation_matrix(numeric_df, method='pearson')
# Plot the heatmap (you can adjust the threshold here)
plot_correlation_heatmap(correlation_matrix, column_names, threshold=0.6)
def plot_correlation_heatmap_WithFilter(correlation_matrix, column_names, threshold=0.6, title='Correlation Matrix'):
# Create a mask for correlations below the threshold (in absolute value)
mask = np.abs(correlation_matrix) <= threshold
# Set diagonal to True to exclude self-correlations from consideration
np.fill_diagonal(mask, True)
# Find columns with at least one correlation above the threshold
columns_to_keep = ~mask.all(axis=0)
# If no columns meet the criteria, return early
if not np.any(columns_to_keep):
print("No correlations above the threshold were found.")
return None
# Filter the correlation matrix and column names
filtered_matrix = correlation_matrix[columns_to_keep][:, columns_to_keep]
filtered_column_names = [column_names[i] for i in range(len(column_names)) if columns_to_keep[i]]
# Recreate the mask for the filtered matrix
filtered_mask = np.abs(filtered_matrix) <= threshold
np.fill_diagonal(filtered_mask, False) # Show self-correlations in the plot
# Calculate figure size based on number of columns
n_cols = len(filtered_column_names)
fig_size = (n_cols * 0.8 + 2, n_cols * 0.8 + 2) # Adjust these multipliers as needed
# Create a heatmap
plt.figure(figsize=fig_size)
sns.heatmap(filtered_matrix,
mask=filtered_mask,
annot=True,
cmap='coolwarm',
vmin=-1,
vmax=1,
center=0,
square=True,
linewidths=0.5,
fmt='.2f',
xticklabels=filtered_column_names,
yticklabels=filtered_column_names,
annot_kws={"size": 16}, # Adjust text size as needed
cbar_kws={"shrink": .8}) # Adjust colorbar size
plt.title(f"{title} (|r| > {threshold})")
plt.xticks(rotation=45, ha="right")
plt.yticks(rotation=0)
return plt.tight_layout()
# Plot the heatmap (you can adjust the threshold here)
plot_correlation_heatmap_WithFilter(correlation_matrix, column_names, threshold=0.6)
# Compute correlation matrix
corr_matrix = numeric_df.corr()
# Apply threshold
high_corr = corr_matrix.where(np.abs(corr_matrix) > .6)
display(high_corr)
| bathrooms | bedrooms | price | square_feet | latitude | longitude | week_of_month | has_Tennis | has_Parking | has_Alarm | has_Golf | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_Yes |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1.0 | 0.6785507254992077 | NaN | 0.6716269592610624 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 0.6785507254992077 | 1.0 | NaN | 0.6615681672898076 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 0.6716269592610624 | 0.6615681672898076 | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.6624100094559625 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.6624100094559625 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN |
| NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 |
Calulating VIF for Entire Dataset¶
This is looking at the entire dataset to see if we have any very highly correlated columns. We see in the correlation matrix we have correlated columns but VIF will help us see a little more about how they may interact. Usually a VIF over 5 is too high and needs to be removed.
type(numeric_df)
pyspark.pandas.frame.DataFrame
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.regression import LinearRegression
from pyspark.ml.evaluation import RegressionEvaluator
import pyspark.pandas as ps
from concurrent.futures import ThreadPoolExecutor, as_completed
# Ensure Spark session is created
spark = SparkSession.builder \
.appName("VIF Calculation") \
.config("spark.sql.shuffle.partitions", "200") \
.getOrCreate()
# Assuming numeric_df is already defined as a pandas-on-Spark DataFrame
# Converting pandas-on-Spark DataFrame to a Spark DataFrame
vector_df = numeric_df.to_spark()
# List to store VIF results
vif_results = []
# Repartition the DataFrame based on its size
num_partitions = vector_df.rdd.getNumPartitions()
optimal_partitions = min(num_partitions, 200)
vector_df = vector_df.repartition(optimal_partitions)
# Function to calculate VIF for a single column
def calculate_vif(target_col, df):
feature_cols = [col for col in vector_df.columns if col not in [target_col, 'unique_id']]
assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
assembled_df = assembler.transform(df)
train_data = assembled_df.select("features", target_col).cache()
lr = LinearRegression(featuresCol="features", labelCol=target_col)
lr_model = lr.fit(train_data)
predictions = lr_model.transform(train_data)
evaluator = RegressionEvaluator(predictionCol="prediction", labelCol=target_col, metricName="r2")
r_sq = evaluator.evaluate(predictions)
vif = 1 / (1 - r_sq) if r_sq != 1 else float('inf')
return (target_col, vif)
# Using ThreadPoolExecutor for concurrent execution
with ThreadPoolExecutor(max_workers=8) as executor: # Adjust the number of workers as needed
futures = {executor.submit(calculate_vif, col, vector_df): col for col in vector_df.columns}
for future in as_completed(futures):
col = futures[future]
try:
vif = future.result()
vif_results.append(vif)
# print(f"VIF for {col}: {vif[1]}")
except Exception as e:
print(f"Error calculating VIF for {col}: {e}")
# Print VIF results at the end
print("\nSummary of VIF for all columns:")
# Sort the vif_results list by VIF value in descending order
sorted_vif_results = sorted(vif_results, key=lambda x: x[1], reverse=True)
# Print the sorted results
for col_name, vif_value in sorted_vif_results:
print(f"VIF for {col_name}: {vif_value}")
# for col_name, vif_value in vif_results:
# print(f"VIF for {col_name}: {vif_value}")
Downloading artifacts: 0%| | 0/15 [00:00<?, ?it/s]
Uploading artifacts: 0%| | 0/4 [00:00<?, ?it/s]
Summary of VIF for all columns: VIF for bathrooms: 2.3624851836755787 VIF for square_feet: 2.339454459038783 VIF for bedrooms: 2.2664234690126075 VIF for has_Dishwasher: 2.1810783161091045 VIF for has_Refrigerator: 1.9591729080835734 VIF for has_Cable_or_Satellite: 1.6179120486024876 VIF for has_Gym: 1.6033882016075538 VIF for has_Unknown: 1.5758371809301963 VIF for has_Pool: 1.5703902700609371 VIF for has_photo_yes: 1.4836796573204838 VIF for has_AC: 1.4530923893716492 VIF for has_Washer_Dryer: 1.3539197127525244 VIF for has_Internet_Access: 1.3451229162250355 VIF for has_Clubhouse: 1.3295707004312385 VIF for price: 1.3242504302753955 VIF for has_Patio/Deck: 1.307685706804794 VIF for week_of_month: 1.2931466480414795 VIF for has_Parking: 1.2819032492905715 VIF for has_Garbage_Disposal: 1.2579875998447216 VIF for has_Fireplace: 1.2163191988166653 VIF for has_Playground: 1.1939382684898312 VIF for has_photo_no: 1.1842839427331735 VIF for has_Gated: 1.1659203164426049 VIF for has_Elevator: 1.1624553131603872 VIF for has_Storage: 1.1564350130457361 VIF for longitude: 1.1187435796303882 VIF for has_Tennis: 1.1164598198761602 VIF for has_Hot_Tub: 1.0935891982712334 VIF for latitude: 1.0873040328865666 VIF for has_TV: 1.0768832665888608 VIF for has_Wood_Floors: 1.0634335456520498 VIF for pets_allowed_Yes: 1.0523330027747977 VIF for has_View: 1.0257949576035088 VIF for has_Alarm: 1.01582526351512 VIF for has_Doorman: 1.0122277512867157 VIF for has_Luxury: 1.0091488084528268 VIF for has_Golf: 1.0054774736471004
We have good VIF scores in our dataset so we will not remove any more columns. If there are moderately correlated columns we will take that into account in our analysis later on.
Summary Stats on dataset¶
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, mean, stddev, min as spark_min, max as spark_max
import pyspark.pandas as ps
from concurrent.futures import ThreadPoolExecutor, as_completed
# Ensure Spark session is created
spark = SparkSession.builder \
.appName("Summary Statistics Calculation") \
.config("spark.sql.shuffle.partitions", "200") \
.getOrCreate()
# Assuming numeric_df is already defined as a pandas-on-Spark DataFrame
# Converting pandas-on-Spark DataFrame to a Spark DataFrame
vector_df = numeric_df.to_spark()
# List to store summary statistics results
summary_results = []
# Repartition the DataFrame based on its size
num_partitions = vector_df.rdd.getNumPartitions()
optimal_partitions = __builtins__.min(num_partitions, 200)
vector_df = vector_df.repartition(optimal_partitions)
# Function to calculate summary statistics for a single column
def calculate_summary_stats(column_name, df):
summary_stats = df.select(
mean(col(column_name)).alias("mean"),
stddev(col(column_name)).alias("stddev"),
spark_min(col(column_name)).alias("min"),
spark_max(col(column_name)).alias("max")
).collect()[0]
summary = {
"column": column_name,
"mean": summary_stats["mean"],
"stddev": summary_stats["stddev"],
"min": summary_stats["min"],
"max": summary_stats["max"]
}
return summary
# Using ThreadPoolExecutor for concurrent execution
with ThreadPoolExecutor(max_workers=8) as executor: # Adjust the number of workers as needed
futures = {executor.submit(calculate_summary_stats, col_name, vector_df): col_name for col_name in vector_df.columns}
for future in as_completed(futures):
col_name = futures[future]
try:
summary = future.result()
summary_results.append(summary)
print(f"Summary statistics for {col_name}: {summary}")
except Exception as e:
print(f"Error calculating summary statistics for {col_name}: {e}")
# Print summary statistics results at the end
print("\nSummary Statistics for all columns:")
for result in summary_results:
print(result)
Summary statistics for bedrooms: {'column': 'bedrooms', 'mean': 1.7257453500406965, 'stddev': 0.7504689092373312, 'min': 0, 'max': 9}
Summary statistics for week_of_month: {'column': 'week_of_month', 'mean': 3.1619924234050463, 'stddev': 0.8342202772785027, 'min': 1, 'max': 5}
Summary statistics for bathrooms: {'column': 'bathrooms', 'mean': 1.4446275510716762, 'stddev': 0.5477713573635926, 'min': 0.0, 'max': 9.0}
Summary statistics for has_Tennis: {'column': 'has_Tennis', 'mean': 0.08566375594119598, 'stddev': 0.27986829746043457, 'min': 0, 'max': 1}
Summary statistics for square_feet: {'column': 'square_feet', 'mean': 956.05147864184, 'stddev': 387.2192844112672, 'min': 101.0, 'max': 40000.0}
Summary statistics for price: {'column': 'price', 'mean': 1525.5443189605796, 'stddev': 902.0558524870771, 'min': 100.0, 'max': 52500.0}
Summary statistics for longitude: {'column': 'longitude', 'mean': -91.48728659826962, 'stddev': 15.737561003856273, 'min': -124.2265, 'max': -68.7788}
Summary statistics for latitude: {'column': 'latitude', 'mean': 36.934092578152516, 'stddev': 4.560699440151283, 'min': 24.5645, 'max': 48.8467}
Summary statistics for has_Parking: {'column': 'has_Parking', 'mean': 0.44104022428328826, 'stddev': 0.4965140703692728, 'min': 0, 'max': 1}
Summary statistics for has_Alarm: {'column': 'has_Alarm', 'mean': 0.0036576665293366964, 'stddev': 0.06036824185793644, 'min': 0, 'max': 1}
Summary statistics for has_TV: {'column': 'has_TV', 'mean': 0.0452786961021735, 'stddev': 0.20791577662422658, 'min': 0, 'max': 1}
Summary statistics for has_Golf: {'column': 'has_Golf', 'mean': 0.00027131042937387584, 'stddev': 0.016469351705051993, 'min': 0, 'max': 1}
Summary statistics for has_Playground: {'column': 'has_Playground', 'mean': 0.11393028326818533, 'stddev': 0.3177281672028879, 'min': 0, 'max': 1}
Summary statistics for has_Clubhouse: {'column': 'has_Clubhouse', 'mean': 0.1923892400293417, 'stddev': 0.3941791238298043, 'min': 0, 'max': 1}
Summary statistics for has_Cable_or_Satellite: {'column': 'has_Cable_or_Satellite', 'mean': 0.12602871871137594, 'stddev': 0.3318833945576695, 'min': 0, 'max': 1}
Summary statistics for has_Refrigerator: {'column': 'has_Refrigerator', 'mean': 0.14987389089301326, 'stddev': 0.3569495595114224, 'min': 0, 'max': 1}
Summary statistics for has_Gated: {'column': 'has_Gated', 'mean': 0.08717103610438418, 'stddev': 0.28208694787488064, 'min': 0, 'max': 1}
Summary statistics for has_Pool: {'column': 'has_Pool', 'mean': 0.43822663464533695, 'stddev': 0.49617187057161083, 'min': 0, 'max': 1}
Summary statistics for has_Unknown: {'column': 'has_Unknown', 'mean': 0.15995257091753168, 'stddev': 0.36656390464268557, 'min': 0, 'max': 1}
Summary statistics for has_View: {'column': 'has_View', 'mean': 0.02104163107810726, 'stddev': 0.14352382321525303, 'min': 0, 'max': 1}
Summary statistics for has_Elevator: {'column': 'has_Elevator', 'mean': 0.04360059085382397, 'stddev': 0.20420577453855282, 'min': 0, 'max': 1}
Summary statistics for has_Wood_Floors: {'column': 'has_Wood_Floors', 'mean': 0.08925108272958389, 'stddev': 0.2851072495882814, 'min': 0, 'max': 1}
Summary statistics for has_Internet_Access: {'column': 'has_Internet_Access', 'mean': 0.11154878061034798, 'stddev': 0.3148120805039674, 'min': 0, 'max': 1}
Summary statistics for has_Hot_Tub: {'column': 'has_Hot_Tub', 'mean': 0.04012379794406986, 'stddev': 0.19625051794718615, 'min': 0, 'max': 1}
Summary statistics for has_Storage: {'column': 'has_Storage', 'mean': 0.21792256599374982, 'stddev': 0.4128365703722296, 'min': 0, 'max': 1}
Summary statistics for has_Gym: {'column': 'has_Gym', 'mean': 0.3758955756302943, 'stddev': 0.48435570528543753, 'min': 0, 'max': 1}
Summary statistics for has_Patio/Deck: {'column': 'has_Patio/Deck', 'mean': 0.2668790256941025, 'stddev': 0.44233084608754325, 'min': 0, 'max': 1}
Summary statistics for has_Washer_Dryer: {'column': 'has_Washer_Dryer', 'mean': 0.2620959233095853, 'stddev': 0.43977675440699454, 'min': 0, 'max': 1}
Summary statistics for has_Luxury: {'column': 'has_Luxury', 'mean': 0.002080046625199715, 'stddev': 0.045560299487497735, 'min': 0, 'max': 1}
Summary statistics for has_Dishwasher: {'column': 'has_Dishwasher', 'mean': 0.1667755257895636, 'stddev': 0.37277720713600654, 'min': 0, 'max': 1}
Summary statistics for has_Garbage_Disposal: {'column': 'has_Garbage_Disposal', 'mean': 0.03893807088236181, 'stddev': 0.19344837439825108, 'min': 0, 'max': 1}
Summary statistics for has_Doorman: {'column': 'has_Doorman', 'mean': 0.002190580503833516, 'stddev': 0.04675258094495654, 'min': 0, 'max': 1}
Summary statistics for has_photo_no: {'column': 'has_photo_no', 'mean': 0.09274797270818051, 'stddev': 0.2900803885426063, 'min': 0, 'max': 1}
Summary statistics for has_AC: {'column': 'has_AC', 'mean': 0.15915873669825256, 'stddev': 0.36582588483039546, 'min': 0, 'max': 1}
Summary statistics for has_photo_yes: {'column': 'has_photo_yes', 'mean': 0.5628184129344735, 'stddev': 0.4960406429938882, 'min': 0, 'max': 1}
Summary statistics for has_Fireplace: {'column': 'has_Fireplace', 'mean': 0.15031602640754846, 'stddev': 0.357382710879453, 'min': 0, 'max': 1}
Summary statistics for pets_allowed_Yes: {'column': 'pets_allowed_Yes', 'mean': 0.9495161630676165, 'stddev': 0.21894223170623736, 'min': 0, 'max': 1}
Summary Statistics for all columns:
{'column': 'bedrooms', 'mean': 1.7257453500406965, 'stddev': 0.7504689092373312, 'min': 0, 'max': 9}
{'column': 'week_of_month', 'mean': 3.1619924234050463, 'stddev': 0.8342202772785027, 'min': 1, 'max': 5}
{'column': 'bathrooms', 'mean': 1.4446275510716762, 'stddev': 0.5477713573635926, 'min': 0.0, 'max': 9.0}
{'column': 'has_Tennis', 'mean': 0.08566375594119598, 'stddev': 0.27986829746043457, 'min': 0, 'max': 1}
{'column': 'square_feet', 'mean': 956.05147864184, 'stddev': 387.2192844112672, 'min': 101.0, 'max': 40000.0}
{'column': 'price', 'mean': 1525.5443189605796, 'stddev': 902.0558524870771, 'min': 100.0, 'max': 52500.0}
{'column': 'longitude', 'mean': -91.48728659826962, 'stddev': 15.737561003856273, 'min': -124.2265, 'max': -68.7788}
{'column': 'latitude', 'mean': 36.934092578152516, 'stddev': 4.560699440151283, 'min': 24.5645, 'max': 48.8467}
{'column': 'has_Parking', 'mean': 0.44104022428328826, 'stddev': 0.4965140703692728, 'min': 0, 'max': 1}
{'column': 'has_Alarm', 'mean': 0.0036576665293366964, 'stddev': 0.06036824185793644, 'min': 0, 'max': 1}
{'column': 'has_TV', 'mean': 0.0452786961021735, 'stddev': 0.20791577662422658, 'min': 0, 'max': 1}
{'column': 'has_Golf', 'mean': 0.00027131042937387584, 'stddev': 0.016469351705051993, 'min': 0, 'max': 1}
{'column': 'has_Playground', 'mean': 0.11393028326818533, 'stddev': 0.3177281672028879, 'min': 0, 'max': 1}
{'column': 'has_Clubhouse', 'mean': 0.1923892400293417, 'stddev': 0.3941791238298043, 'min': 0, 'max': 1}
{'column': 'has_Cable_or_Satellite', 'mean': 0.12602871871137594, 'stddev': 0.3318833945576695, 'min': 0, 'max': 1}
{'column': 'has_Refrigerator', 'mean': 0.14987389089301326, 'stddev': 0.3569495595114224, 'min': 0, 'max': 1}
{'column': 'has_Gated', 'mean': 0.08717103610438418, 'stddev': 0.28208694787488064, 'min': 0, 'max': 1}
{'column': 'has_Pool', 'mean': 0.43822663464533695, 'stddev': 0.49617187057161083, 'min': 0, 'max': 1}
{'column': 'has_Unknown', 'mean': 0.15995257091753168, 'stddev': 0.36656390464268557, 'min': 0, 'max': 1}
{'column': 'has_View', 'mean': 0.02104163107810726, 'stddev': 0.14352382321525303, 'min': 0, 'max': 1}
{'column': 'has_Elevator', 'mean': 0.04360059085382397, 'stddev': 0.20420577453855282, 'min': 0, 'max': 1}
{'column': 'has_Wood_Floors', 'mean': 0.08925108272958389, 'stddev': 0.2851072495882814, 'min': 0, 'max': 1}
{'column': 'has_Internet_Access', 'mean': 0.11154878061034798, 'stddev': 0.3148120805039674, 'min': 0, 'max': 1}
{'column': 'has_Hot_Tub', 'mean': 0.04012379794406986, 'stddev': 0.19625051794718615, 'min': 0, 'max': 1}
{'column': 'has_Storage', 'mean': 0.21792256599374982, 'stddev': 0.4128365703722296, 'min': 0, 'max': 1}
{'column': 'has_Gym', 'mean': 0.3758955756302943, 'stddev': 0.48435570528543753, 'min': 0, 'max': 1}
{'column': 'has_Patio/Deck', 'mean': 0.2668790256941025, 'stddev': 0.44233084608754325, 'min': 0, 'max': 1}
{'column': 'has_Washer_Dryer', 'mean': 0.2620959233095853, 'stddev': 0.43977675440699454, 'min': 0, 'max': 1}
{'column': 'has_Luxury', 'mean': 0.002080046625199715, 'stddev': 0.045560299487497735, 'min': 0, 'max': 1}
{'column': 'has_Dishwasher', 'mean': 0.1667755257895636, 'stddev': 0.37277720713600654, 'min': 0, 'max': 1}
{'column': 'has_Garbage_Disposal', 'mean': 0.03893807088236181, 'stddev': 0.19344837439825108, 'min': 0, 'max': 1}
{'column': 'has_Doorman', 'mean': 0.002190580503833516, 'stddev': 0.04675258094495654, 'min': 0, 'max': 1}
{'column': 'has_photo_no', 'mean': 0.09274797270818051, 'stddev': 0.2900803885426063, 'min': 0, 'max': 1}
{'column': 'has_AC', 'mean': 0.15915873669825256, 'stddev': 0.36582588483039546, 'min': 0, 'max': 1}
{'column': 'has_photo_yes', 'mean': 0.5628184129344735, 'stddev': 0.4960406429938882, 'min': 0, 'max': 1}
{'column': 'has_Fireplace', 'mean': 0.15031602640754846, 'stddev': 0.357382710879453, 'min': 0, 'max': 1}
{'column': 'pets_allowed_Yes', 'mean': 0.9495161630676165, 'stddev': 0.21894223170623736, 'min': 0, 'max': 1}
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
# Create a Spark session if you haven't already
spark_res_df = SparkSession.builder.getOrCreate()
def convert_to_float(value):
if isinstance(value, (int, float)):
return float(value)
else:
return value # Return as-is for other types (like strings)
# Preprocess the data
processed_summary_results = []
for item in summary_results:
processed_item = {
"column": item["column"],
"mean": convert_to_float(item["mean"]),
"stddev": convert_to_float(item["stddev"]),
"min": convert_to_float(item["min"]),
"max": convert_to_float(item["max"])
}
processed_summary_results.append(processed_item)
# Define the schema
schema = StructType([
StructField("column", StringType(), True),
StructField("mean", DoubleType(), True),
StructField("stddev", DoubleType(), True),
StructField("min", DoubleType(), True),
StructField("max", DoubleType(), True)
])
# Create the Spark DataFrame
summary_result_df = spark_res_df.createDataFrame(processed_summary_results, schema)
# Show the DataFrame
summary_result_df.show()
+--------------------+--------------------+--------------------+---------+--------+ | column| mean| stddev| min| max| +--------------------+--------------------+--------------------+---------+--------+ | bedrooms| 1.7257453500406965| 0.7504689092373312| 0.0| 9.0| | week_of_month| 3.1619924234050463| 0.8342202772785027| 1.0| 5.0| | bathrooms| 1.4446275510716762| 0.5477713573635926| 0.0| 9.0| | has_Tennis| 0.08566375594119598| 0.27986829746043457| 0.0| 1.0| | square_feet| 956.05147864184| 387.2192844112672| 101.0| 40000.0| | price| 1525.5443189605796| 902.0558524870771| 100.0| 52500.0| | longitude| -91.48728659826962| 15.737561003856273|-124.2265|-68.7788| | latitude| 36.934092578152516| 4.560699440151283| 24.5645| 48.8467| | has_Parking| 0.44104022428328826| 0.4965140703692728| 0.0| 1.0| | has_Alarm|0.003657666529336...| 0.06036824185793644| 0.0| 1.0| | has_TV| 0.0452786961021735| 0.20791577662422658| 0.0| 1.0| | has_Golf|2.713104293738758...|0.016469351705051993| 0.0| 1.0| | has_Playground| 0.11393028326818533| 0.3177281672028879| 0.0| 1.0| | has_Clubhouse| 0.1923892400293417| 0.3941791238298043| 0.0| 1.0| |has_Cable_or_Sate...| 0.12602871871137594| 0.3318833945576695| 0.0| 1.0| | has_Refrigerator| 0.14987389089301326| 0.3569495595114224| 0.0| 1.0| | has_Gated| 0.08717103610438418| 0.28208694787488064| 0.0| 1.0| | has_Pool| 0.43822663464533695| 0.49617187057161083| 0.0| 1.0| | has_Unknown| 0.15995257091753168| 0.36656390464268557| 0.0| 1.0| | has_View| 0.02104163107810726| 0.14352382321525303| 0.0| 1.0| +--------------------+--------------------+--------------------+---------+--------+ only showing top 20 rows
summary_result_df.display()
| column | mean | stddev | min | max |
|---|---|---|---|---|
| bedrooms | 1.7257453500406965 | 0.7504689092373312 | 0.0 | 9.0 |
| week_of_month | 3.1619924234050463 | 0.8342202772785027 | 1.0 | 5.0 |
| bathrooms | 1.4446275510716762 | 0.5477713573635926 | 0.0 | 9.0 |
| has_Tennis | 0.08566375594119598 | 0.27986829746043457 | 0.0 | 1.0 |
| square_feet | 956.05147864184 | 387.2192844112672 | 101.0 | 40000.0 |
| price | 1525.5443189605796 | 902.0558524870771 | 100.0 | 52500.0 |
| longitude | -91.48728659826962 | 15.737561003856273 | -124.2265 | -68.7788 |
| latitude | 36.934092578152516 | 4.560699440151283 | 24.5645 | 48.8467 |
| has_Parking | 0.44104022428328826 | 0.4965140703692728 | 0.0 | 1.0 |
| has_Alarm | 0.0036576665293366964 | 0.06036824185793644 | 0.0 | 1.0 |
| has_TV | 0.0452786961021735 | 0.20791577662422658 | 0.0 | 1.0 |
| has_Golf | 2.7131042937387584E-4 | 0.016469351705051993 | 0.0 | 1.0 |
| has_Playground | 0.11393028326818533 | 0.3177281672028879 | 0.0 | 1.0 |
| has_Clubhouse | 0.1923892400293417 | 0.3941791238298043 | 0.0 | 1.0 |
| has_Cable_or_Satellite | 0.12602871871137594 | 0.3318833945576695 | 0.0 | 1.0 |
| has_Refrigerator | 0.14987389089301326 | 0.3569495595114224 | 0.0 | 1.0 |
| has_Gated | 0.08717103610438418 | 0.28208694787488064 | 0.0 | 1.0 |
| has_Pool | 0.43822663464533695 | 0.49617187057161083 | 0.0 | 1.0 |
| has_Unknown | 0.15995257091753168 | 0.36656390464268557 | 0.0 | 1.0 |
| has_View | 0.02104163107810726 | 0.14352382321525303 | 0.0 | 1.0 |
| has_Elevator | 0.04360059085382397 | 0.20420577453855282 | 0.0 | 1.0 |
| has_Wood_Floors | 0.08925108272958389 | 0.2851072495882814 | 0.0 | 1.0 |
| has_Internet_Access | 0.11154878061034798 | 0.3148120805039674 | 0.0 | 1.0 |
| has_Hot_Tub | 0.04012379794406986 | 0.19625051794718615 | 0.0 | 1.0 |
| has_Storage | 0.21792256599374982 | 0.4128365703722296 | 0.0 | 1.0 |
| has_Gym | 0.3758955756302943 | 0.48435570528543753 | 0.0 | 1.0 |
| has_Patio/Deck | 0.2668790256941025 | 0.44233084608754325 | 0.0 | 1.0 |
| has_Washer_Dryer | 0.2620959233095853 | 0.43977675440699454 | 0.0 | 1.0 |
| has_Luxury | 0.002080046625199715 | 0.045560299487497735 | 0.0 | 1.0 |
| has_Dishwasher | 0.1667755257895636 | 0.37277720713600654 | 0.0 | 1.0 |
| has_Garbage_Disposal | 0.03893807088236181 | 0.19344837439825108 | 0.0 | 1.0 |
| has_Doorman | 0.002190580503833516 | 0.04675258094495654 | 0.0 | 1.0 |
| has_photo_no | 0.09274797270818051 | 0.2900803885426063 | 0.0 | 1.0 |
| has_AC | 0.15915873669825256 | 0.36582588483039546 | 0.0 | 1.0 |
| has_photo_yes | 0.5628184129344735 | 0.4960406429938882 | 0.0 | 1.0 |
| has_Fireplace | 0.15031602640754846 | 0.357382710879453 | 0.0 | 1.0 |
| pets_allowed_Yes | 0.9495161630676165 | 0.21894223170623736 | 0.0 | 1.0 |
numeric_df.describe()
| bathrooms | bedrooms | price | square_feet | latitude | longitude | week_of_month | has_Tennis | has_Parking | has_Alarm | has_Golf | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.00000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 | 99517.000000 |
| mean | 1.444628 | 1.725745 | 1525.544319 | 956.051479 | 36.934093 | -91.487287 | 3.161992 | 0.085664 | 0.441040 | 0.003658 | 0.000271 | 0.045279 | 0.192389 | 0.113930 | 0.149874 | 0.126029 | 0.159953 | 0.087171 | 0.438227 | 0.089251 | 0.111549 | 0.021042 | 0.043601 | 0.040124 | 0.375896 | 0.217923 | 0.002191 | 0.166776 | 0.262096 | 0.266879 | 0.038938 | 0.00208 | 0.159159 | 0.150316 | 0.092748 | 0.562818 | 0.949516 |
| std | 0.547771 | 0.750469 | 902.055852 | 387.219284 | 4.560699 | 15.737561 | 0.834220 | 0.279868 | 0.496514 | 0.060368 | 0.016469 | 0.207916 | 0.394179 | 0.317728 | 0.356950 | 0.331883 | 0.366564 | 0.282087 | 0.496172 | 0.285107 | 0.314812 | 0.143524 | 0.204206 | 0.196251 | 0.484356 | 0.412837 | 0.046753 | 0.372777 | 0.439777 | 0.442331 | 0.193448 | 0.04556 | 0.365826 | 0.357383 | 0.290080 | 0.496041 | 0.218942 |
| min | 0.000000 | 0.000000 | 100.000000 | 101.000000 | 24.564500 | -124.226500 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 1.000000 | 1.000000 | 1014.000000 | 730.000000 | 33.746500 | -104.791900 | 3.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 |
| 50% | 1.000000 | 2.000000 | 1350.000000 | 900.000000 | 37.213900 | -84.538200 | 3.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 |
| 75% | 2.000000 | 2.000000 | 1795.000000 | 1115.000000 | 39.955900 | -77.569900 | 4.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 |
| max | 9.000000 | 9.000000 | 52500.000000 | 40000.000000 | 48.846700 | -68.778800 | 5.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.00000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
Linear Regression across Dataset¶
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf, when
from pyspark.ml.linalg import Vectors, VectorUDT
from pyspark.ml.feature import StandardScaler, VectorAssembler
from pyspark.ml.regression import LinearRegression
import pyspark.pandas as ps
# Ensure Spark session is created
spark = SparkSession.builder \
.appName("Linear Regression with Manual Feature Assembly") \
.config("spark.sql.shuffle.partitions", "200") \
.getOrCreate()
# Converting pandas-on-Spark DataFrame to a Spark DataFrame
vector_df = numeric_df.to_spark()
# Define the target column
target_col = "price"
# Manually create one-hot encoded columns for week_of_month
for i in range(1, 5): # Assuming weeks are 1-4
vector_df = vector_df.withColumn(f"week_{i}", when(col("week_of_month") == i, 1).otherwise(0))
# Standardize 'square feet'
assembler = VectorAssembler(inputCols=["square_feet"], outputCol="square_feet_vec")
scaler = StandardScaler(inputCol="square_feet_vec", outputCol="scaled_square_feet")
vector_df = assembler.transform(vector_df)
scaler_model = scaler.fit(vector_df)
vector_df = scaler_model.transform(vector_df)
# Extract the scaled square feet from the vector
get_first = udf(lambda v: float(v[0]), returnType=DoubleType())
vector_df = vector_df.withColumn("scaled_square_feet", get_first("scaled_square_feet"))
# Define the order of features
feature_cols = ['latitude', 'longitude', 'has_Tennis', 'has_Parking', 'has_Alarm', 'has_Golf', 'has_TV', 'has_Clubhouse','has_Playground', 'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown','has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access', 'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage', 'has_Doorman','has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck', 'has_Garbage_Disposal','has_Luxury', 'has_AC', 'has_Fireplace', 'has_photo_no', 'has_photo_yes','pets_allowed_Yes', 'bathrooms', 'bedrooms', 'scaled_square_feet', 'week_1', 'week_2', 'week_3', 'week_4']
# Create a UDF to assemble features into a vector
@udf(returnType=VectorUDT())
def assemble_features(*cols):
return Vectors.dense([float(c) for c in cols])
# Apply the UDF to create the feature vector
vector_df = vector_df.withColumn("features", assemble_features(*feature_cols))
# Select the features and target columns for training
train_data = vector_df.select("features", target_col)
# Examine the structure of train_data
print("\nStructure of train_data:")
train_data.show(5, truncate=False)
# Print the number of features in train_data
num_features = len(feature_cols)
print(f"\nNumber of features in train_data: {num_features}")
# Set up and train the Linear Regression model
lr = LinearRegression(featuresCol="features", labelCol=target_col)
lr_model = lr.fit(train_data)
# Extract coefficients and intercept
coefficients = lr_model.coefficients
intercept = lr_model.intercept
print(f"\nNumber of coefficients: {len(coefficients)}")
# Compare the number of features and coefficients
if num_features == len(coefficients):
print("The number of features matches the number of coefficients.")
else:
print(f"Mismatch: {num_features} features vs {len(coefficients)} coefficients.")
# Get the summary of the model
training_summary = lr_model.summary
# Extract summary statistics
r_squared = training_summary.r2
adjusted_r_squared = training_summary.r2adj
rmse = training_summary.rootMeanSquaredError
mae = training_summary.meanAbsoluteError
explained_variance = training_summary.explainedVariance
# Create a pandas-on-Spark DataFrame for coefficients including the intercept
coeff_psdf = ps.DataFrame({
"Feature": feature_cols + ["Intercept"],
"Coefficient": list(coefficients) + [intercept]
})
# Add a new column for the absolute value of coefficients and sort by it
coeff_psdf['abs_coefficient'] = coeff_psdf['Coefficient'].abs()
coeff_psdf = coeff_psdf.sort_values(by='abs_coefficient', ascending=False).drop(columns=['abs_coefficient'])
# Create a pandas-on-Spark DataFrame for summary statistics
summary_stats = {
"R-squared": [r_squared],
"Adjusted R-squared": [adjusted_r_squared],
"RMSE": [rmse],
"MAE": [mae],
"Explained Variance": [explained_variance],
"Total Iterations": [training_summary.totalIterations]
}
summary_stats_psdf = ps.DataFrame(summary_stats)
# Display the DataFrames
print("\nCoefficients:")
print(coeff_psdf)
print("\nModel Summary Statistics:")
print(summary_stats_psdf)
# Uncache the DataFrame
train_data.unpersist()
Structure of train_data: +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+ |features |price | +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+ |[39.0342,-94.5429,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,3.0,6.456290016136541,0.0,0.0,0.0,1.0] |800.0 | |[37.5423,-77.4347,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,2.0,2.0,2.259701505647789,0.0,0.0,0.0,1.0] |1000.0| |[33.9222,-84.0725,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,2.0143624850346007,0.0,0.0,1.0,0.0]|1017.0| |[36.1599,-78.8975,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,2.879505347196897,0.0,0.0,1.0,0.0] |1023.0| |[28.0395,-82.3952,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.5,2.0,3.3572708083910014,0.0,0.0,0.0,1.0]|1025.0| +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+ only showing top 5 rows Number of features in train_data: 39
Downloading artifacts: 0%| | 0/15 [00:00<?, ?it/s]
Uploading artifacts: 0%| | 0/4 [00:00<?, ?it/s]
Number of coefficients: 39
The number of features matches the number of coefficients.
Coefficients:
Feature Coefficient
21 has_Doorman 1148.716768
39 Intercept -714.878015
17 has_Elevator 499.418298
34 scaled_square_feet 332.941781
32 bathrooms 263.245148
14 has_Wood_Floors 237.347215
8 has_Playground -202.845298
4 has_Alarm 201.098182
16 has_View 198.102579
25 has_Garbage_Disposal -156.083536
38 week_4 -152.516798
7 has_Clubhouse -136.133296
33 bedrooms -119.110003
5 has_Golf 115.999245
36 week_2 -98.548117
6 has_TV 97.574141
19 has_Gym 90.706536
23 has_Washer_Dryer -88.462153
28 has_Fireplace -73.894957
29 has_photo_no 65.849407
10 has_Cable_or_Satellite -59.640026
24 has_Patio/Deck 58.142950
35 week_1 56.487169
20 has_Storage -44.021378
26 has_Luxury 42.007378
18 has_Hot_Tub 39.875829
9 has_Refrigerator -38.136788
3 has_Parking 26.813819
37 week_3 19.927478
15 has_Internet_Access 18.751996
27 has_AC -16.719452
2 has_Tennis -15.076323
12 has_Gated 14.511717
0 latitude 14.481363
30 has_photo_yes 10.668124
13 has_Pool -8.475591
1 longitude -8.217182
11 has_Unknown 4.414043
31 pets_allowed_Yes 1.477132
22 has_Dishwasher -0.199086
Model Summary Statistics:
R-squared Adjusted R-squared RMSE MAE Explained Variance Total Iterations
0 0.249791 0.249497 781.308281 477.632184 203253.954134 0
DataFrame[features: vector, price: double]
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, udf, monotonically_increasing_id
from pyspark.sql.types import DoubleType
from pyspark.ml.feature import StandardScaler, VectorAssembler
from pyspark.ml.linalg import VectorUDT
import pyspark.pandas as ps
# Ensure Spark session is created
spark = SparkSession.builder \
.appName("Feature Preparation for Analysis") \
.config("spark.sql.shuffle.partitions", "200") \
.getOrCreate()
# Converting pandas-on-Spark DataFrame to a Spark DataFrame
vector_df = numeric_df.to_spark()
# Define the target column
target_col = "price"
# Add a unique ID column
vector_df = vector_df.withColumn("unique_id", monotonically_increasing_id())
# Manually create one-hot encoded columns for week_of_month
for i in range(1, 5): # Assuming weeks are 1-4
vector_df = vector_df.withColumn(f"week_{i}", when(col("week_of_month") == i, 1).otherwise(0))
# Standardize 'square feet'
assembler = VectorAssembler(inputCols=["square_feet"], outputCol="square_feet_vec")
scaler = StandardScaler(inputCol="square_feet_vec", outputCol="scaled_square_feet")
vector_df = assembler.transform(vector_df)
scaler_model = scaler.fit(vector_df)
vector_df = scaler_model.transform(vector_df)
# Create a UDF to extract the first (and only) element from the vector
@udf(returnType=DoubleType())
def extract_from_vector(v):
return float(v[0])
# Extract the scaled square feet from the vector
vector_df = vector_df.withColumn("scaled_square_feet", extract_from_vector(col("scaled_square_feet")))
# Define the columns we want to keep
columns_to_keep = [
'unique_id',
'latitude', 'longitude',
'has_Tennis', 'has_Parking', 'has_Alarm', 'has_Golf', 'has_TV', 'has_Clubhouse',
'has_Playground', 'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access', 'has_View',
'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage', 'has_Doorman',
'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck', 'has_Garbage_Disposal',
'has_Luxury', 'has_AC', 'has_Fireplace', 'has_photo_no', 'has_photo_yes',
'pets_allowed_Yes',
'bathrooms', 'bedrooms', 'scaled_square_feet',
'week_1', 'week_2', 'week_3', 'week_4',
target_col
]
# Create the final DataFrame with only the columns we need
final_df = vector_df.select(columns_to_keep)
# Convert to pandas-on-Spark DataFrame for easier viewing
final_psdf = final_df.pandas_api()
# Display info about the final DataFrame
print(final_psdf.info())
# Display the first few rows of the final DataFrame
print("\nFirst few rows of the final DataFrame:")
final_psdf.head()
# Save the final DataFrame for further analysis
# Uncomment the following line if you want to save it as a CSV file
# final_psdf.to_csv('prepared_rental_data.csv', index=False)
<class 'pyspark.pandas.frame.DataFrame'> Int64Index: 99517 entries, 0 to 95562 Data columns (total 41 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 unique_id 99517 non-null int64 1 latitude 99517 non-null float64 2 longitude 99517 non-null float64 3 has_Tennis 99517 non-null int32 4 has_Parking 99517 non-null int32 5 has_Alarm 99517 non-null int32 6 has_Golf 99517 non-null int32 7 has_TV 99517 non-null int32 8 has_Clubhouse 99517 non-null int32 9 has_Playground 99517 non-null int32 10 has_Refrigerator 99517 non-null int32 11 has_Cable_or_Satellite 99517 non-null int32 12 has_Unknown 99517 non-null int32 13 has_Gated 99517 non-null int32 14 has_Pool 99517 non-null int32 15 has_Wood_Floors 99517 non-null int32 16 has_Internet_Access 99517 non-null int32 17 has_View 99517 non-null int32 18 has_Elevator 99517 non-null int32 19 has_Hot_Tub 99517 non-null int32 20 has_Gym 99517 non-null int32 21 has_Storage 99517 non-null int32 22 has_Doorman 99517 non-null int32 23 has_Dishwasher 99517 non-null int32 24 has_Washer_Dryer 99517 non-null int32 25 has_Patio/Deck 99517 non-null int32 26 has_Garbage_Disposal 99517 non-null int32 27 has_Luxury 99517 non-null int32 28 has_AC 99517 non-null int32 29 has_Fireplace 99517 non-null int32 30 has_photo_no 99517 non-null int32 31 has_photo_yes 99517 non-null int32 32 pets_allowed_Yes 99517 non-null int32 33 bathrooms 99517 non-null float64 34 bedrooms 99517 non-null int32 35 scaled_square_feet 99517 non-null float64 36 week_1 99517 non-null int32 37 week_2 99517 non-null int32 38 week_3 99517 non-null int32 39 week_4 99517 non-null int32 40 price 99517 non-null float64 dtypes: float64(5), int32(35), int64(1)None First few rows of the final DataFrame:
| unique_id | latitude | longitude | has_Tennis | has_Parking | has_Alarm | has_Golf | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_Yes | bathrooms | bedrooms | scaled_square_feet | week_1 | week_2 | week_3 | week_4 | price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 39.0342 | -94.5429 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1.0 | 3 | 6.456290 | 0 | 0 | 0 | 1 | 800.0 |
| 1 | 1 | 37.5423 | -77.4347 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 1 | 2.0 | 2 | 2.259702 | 0 | 0 | 0 | 1 | 1000.0 |
| 2 | 2 | 33.9222 | -84.0725 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1.0 | 1 | 2.014362 | 0 | 0 | 1 | 0 | 1017.0 |
| 3 | 3 | 36.1599 | -78.8975 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 2.0 | 2 | 2.879505 | 0 | 0 | 1 | 0 | 1023.0 |
| 4 | 4 | 28.0395 | -82.3952 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1.5 | 2 | 3.357271 | 0 | 0 | 0 | 1 | 1025.0 |
summary_stats_psdf.display()
coeff_psdf.display()
| R-squared | Adjusted R-squared | RMSE | MAE | Explained Variance | Total Iterations |
|---|---|---|---|---|---|
| 0.2497908409776205 | 0.24949672115895016 | 781.3082812382936 | 477.6321843139007 | 203253.95413435763 | 0 |
| Feature | Coefficient |
|---|---|
| has_Doorman | 1148.7167676165839 |
| Intercept | -714.878014571396 |
| has_Elevator | 499.41829834028454 |
| scaled_square_feet | 332.94178089917335 |
| bathrooms | 263.2451481138218 |
| has_Wood_Floors | 237.34721530559665 |
| has_Playground | -202.84529822548024 |
| has_Alarm | 201.09818175498555 |
| has_View | 198.1025786087673 |
| has_Garbage_Disposal | -156.08353562283395 |
| week_4 | -152.51679771144896 |
| has_Clubhouse | -136.13329618871902 |
| bedrooms | -119.11000269091117 |
| has_Golf | 115.99924525105642 |
| week_2 | -98.54811710308732 |
| has_TV | 97.57414134223637 |
| has_Gym | 90.706535685193 |
| has_Washer_Dryer | -88.46215297666268 |
| has_Fireplace | -73.89495668117976 |
| has_photo_no | 65.84940700298462 |
| has_Cable_or_Satellite | -59.640025604448525 |
| has_Patio/Deck | 58.14295008391875 |
| week_1 | 56.48716931487229 |
| has_Storage | -44.021378199388494 |
| has_Luxury | 42.00737779701831 |
| has_Hot_Tub | 39.87582885358747 |
| has_Refrigerator | -38.1367880892332 |
| has_Parking | 26.81381941385856 |
| week_3 | 19.927477921930628 |
| has_Internet_Access | 18.75199634433781 |
| has_AC | -16.719452095245575 |
| has_Tennis | -15.076323314998662 |
| has_Gated | 14.511716773885327 |
| latitude | 14.481362792222722 |
| has_photo_yes | 10.668123624570054 |
| has_Pool | -8.475591367472179 |
| longitude | -8.217181919323505 |
| has_Unknown | 4.4140434411031215 |
| pets_allowed_Yes | 1.4771320751831025 |
| has_Dishwasher | -0.19908565558793928 |
Scaling Data, Silhouette, Kmeans and Clustering¶
We had some optimization issues which we solved with some spark caching, threadpooling and parallelization.
import pyspark.pandas as ps
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.clustering import KMeans
from pyspark.ml.evaluation import ClusteringEvaluator
from pyspark.sql import SparkSession
import time
from concurrent.futures import ThreadPoolExecutor
# Assuming you have a Spark session already created
spark = SparkSession.builder.getOrCreate()
# Assuming numeric_df is your pandas-on-Spark DataFrame
# Step 1: Create a unique_id column
# numeric_df = numeric_df.reset_index(drop=True)
# numeric_df['unique_id'] = numeric_df.index
#
# Step 2: Select the necessary columns
df = final_psdf[['unique_id', 'latitude', 'longitude']]
# Convert pandas-on-Spark DataFrame to Spark DataFrame for MLlib compatibility
df_spark = df.to_spark()
# Assemble the features
vector_assembler = VectorAssembler(inputCols=['latitude', 'longitude'], outputCol='features')
df_spark = vector_assembler.transform(df_spark)
# Cache the DataFrame to optimize performance
df_spark.cache()
# Step 3: Define a function to perform k-means clustering and compute silhouette score and WCSS
def compute_metrics(k, df_spark):
evaluator = ClusteringEvaluator()
start_time = time.time()
kmeans = KMeans(k=k, seed=42)
model = kmeans.fit(df_spark)
transformed = model.transform(df_spark)
silhouette = evaluator.evaluate(transformed)
wcss = model.summary.trainingCost
end_time = time.time()
elapsed_time = end_time - start_time
return k, silhouette, wcss, elapsed_time
# Create a list of k values
k_values = list(range(2, 81))
# Step 4: Use ThreadPoolExecutor to parallelize metric computation
metrics = []
def parallel_compute_metrics(k):
return compute_metrics(k, df_spark)
with ThreadPoolExecutor(max_workers=8) as executor: # Adjust max_workers based on your cluster
results = list(executor.map(parallel_compute_metrics, k_values))
metrics = results
# Print metrics and computation times
for k, silhouette, wcss, time_taken in metrics:
print(f"k: {k}, Silhouette Score: {silhouette}, WCSS: {wcss}, Time Taken: {time_taken} seconds")
# Step 5: Determine the optimal k based on silhouette scores
optimal_k = max(metrics, key=lambda x: x[1])[0]
print(f"Optimal k based on Silhouette Score: {optimal_k}")
# Perform k-means clustering with the optimal k
kmeans = KMeans(k=optimal_k, seed=42)
model = kmeans.fit(df_spark)
df_spark = model.transform(df_spark)
# Convert back to pandas-on-Spark DataFrame if needed
result_df = ps.DataFrame(df_spark.select('unique_id', 'latitude', 'longitude', 'prediction'))
# Show the resultsss
result_df.head()
# Clean up
df_spark.unpersist()
Downloading artifacts: 0%| | 0/15 [00:00<?, ?it/s]
Uploading artifacts: 0%| | 0/4 [00:00<?, ?it/s]
Downloading artifacts: 0%| | 0/15 [00:00<?, ?it/s]
Uploading artifacts: 0%| | 0/4 [00:00<?, ?it/s]
k: 2, Silhouette Score: 0.763449034248761, WCSS: 7819512.118361829, Time Taken: 12.302191257476807 seconds k: 3, Silhouette Score: 0.7066676965898263, WCSS: 4130639.958093457, Time Taken: 56.06333518028259 seconds k: 4, Silhouette Score: 0.626529195061003, WCSS: 3066695.262633371, Time Taken: 12.9237060546875 seconds k: 5, Silhouette Score: 0.6570225770709357, WCSS: 2184460.5820053252, Time Taken: 12.365989923477173 seconds k: 6, Silhouette Score: 0.674843772590051, WCSS: 1734666.9166672565, Time Taken: 12.941851377487183 seconds k: 7, Silhouette Score: 0.6453930030584214, WCSS: 1369393.9327533566, Time Taken: 13.146583557128906 seconds k: 8, Silhouette Score: 0.6668562607779775, WCSS: 1375699.5277522383, Time Taken: 12.790895223617554 seconds k: 9, Silhouette Score: 0.7479653824112578, WCSS: 796316.6247356371, Time Taken: 14.169133424758911 seconds k: 10, Silhouette Score: 0.718012839465669, WCSS: 797762.747828376, Time Taken: 6.459659814834595 seconds k: 11, Silhouette Score: 0.7081238587805871, WCSS: 757223.6080651081, Time Taken: 6.1843955516815186 seconds k: 12, Silhouette Score: 0.7297178584358022, WCSS: 526125.8994965373, Time Taken: 5.58376669883728 seconds k: 13, Silhouette Score: 0.7730932477821403, WCSS: 459962.5308252313, Time Taken: 7.0191755294799805 seconds k: 14, Silhouette Score: 0.6973491100003041, WCSS: 510970.3514817107, Time Taken: 6.999093055725098 seconds k: 15, Silhouette Score: 0.7488379733628568, WCSS: 369717.7783567335, Time Taken: 6.379661798477173 seconds k: 16, Silhouette Score: 0.7576676038945148, WCSS: 315268.5092324907, Time Taken: 5.886139869689941 seconds k: 17, Silhouette Score: 0.7752504884542833, WCSS: 291043.90755458834, Time Taken: 6.939961910247803 seconds k: 18, Silhouette Score: 0.7818385047999875, WCSS: 255229.731579469, Time Taken: 8.048190116882324 seconds k: 19, Silhouette Score: 0.7680298830490281, WCSS: 357955.1545308105, Time Taken: 5.2976391315460205 seconds k: 20, Silhouette Score: 0.7926414459045124, WCSS: 208535.88127667134, Time Taken: 9.10690450668335 seconds k: 21, Silhouette Score: 0.7789597054791254, WCSS: 205638.80449035042, Time Taken: 9.247199058532715 seconds k: 22, Silhouette Score: 0.7977354965000903, WCSS: 183427.20801378967, Time Taken: 8.668189525604248 seconds k: 23, Silhouette Score: 0.8046357428406448, WCSS: 183530.45723809785, Time Taken: 6.809260845184326 seconds k: 24, Silhouette Score: 0.7439339669693564, WCSS: 197258.19261235185, Time Taken: 7.405336618423462 seconds k: 25, Silhouette Score: 0.7767917401232765, WCSS: 169568.61301595188, Time Taken: 6.740826606750488 seconds k: 26, Silhouette Score: 0.7932025975963954, WCSS: 148493.87824072098, Time Taken: 8.219924449920654 seconds k: 27, Silhouette Score: 0.7960708453693889, WCSS: 164643.6345777706, Time Taken: 7.513131618499756 seconds k: 28, Silhouette Score: 0.8340780235239595, WCSS: 118442.9851823928, Time Taken: 7.094217300415039 seconds k: 29, Silhouette Score: 0.8330482192122055, WCSS: 116934.47893890175, Time Taken: 7.004460096359253 seconds k: 30, Silhouette Score: 0.8171113636663343, WCSS: 110803.65912103826, Time Taken: 8.085708379745483 seconds k: 31, Silhouette Score: 0.816601895192477, WCSS: 108347.51366863374, Time Taken: 6.879997253417969 seconds k: 32, Silhouette Score: 0.8018064277801296, WCSS: 102294.49071926378, Time Taken: 6.232992887496948 seconds k: 33, Silhouette Score: 0.804207924790133, WCSS: 96471.49709637574, Time Taken: 7.1942408084869385 seconds k: 34, Silhouette Score: 0.8279710541479418, WCSS: 87438.47453428664, Time Taken: 8.69472885131836 seconds k: 35, Silhouette Score: 0.8159013280217633, WCSS: 92001.43781252524, Time Taken: 6.730831623077393 seconds k: 36, Silhouette Score: 0.797526439419168, WCSS: 78505.13722900697, Time Taken: 8.021164178848267 seconds k: 37, Silhouette Score: 0.8087660877390394, WCSS: 78845.45651923312, Time Taken: 7.640315055847168 seconds k: 38, Silhouette Score: 0.7971209280863393, WCSS: 74982.47259881966, Time Taken: 7.682674884796143 seconds k: 39, Silhouette Score: 0.7864781414727542, WCSS: 77008.91499213867, Time Taken: 8.548113584518433 seconds k: 40, Silhouette Score: 0.792104573140613, WCSS: 73386.27450628248, Time Taken: 7.544145345687866 seconds k: 41, Silhouette Score: 0.7814244271658701, WCSS: 73820.90253733854, Time Taken: 7.601768970489502 seconds k: 42, Silhouette Score: 0.7754729563063464, WCSS: 73284.59884842591, Time Taken: 7.47021484375 seconds k: 43, Silhouette Score: 0.7913512716659951, WCSS: 65430.468663536, Time Taken: 12.877533435821533 seconds k: 44, Silhouette Score: 0.7548145174390559, WCSS: 70038.20725082759, Time Taken: 17.766409397125244 seconds k: 45, Silhouette Score: 0.8200907979124444, WCSS: 57558.94401610538, Time Taken: 15.773146629333496 seconds k: 46, Silhouette Score: 0.8068605977546413, WCSS: 52397.45705784256, Time Taken: 16.517797708511353 seconds k: 47, Silhouette Score: 0.7900390667040961, WCSS: 54484.19707486911, Time Taken: 17.90798020362854 seconds k: 48, Silhouette Score: 0.8003772678172559, WCSS: 54661.586510008055, Time Taken: 17.240264177322388 seconds k: 49, Silhouette Score: 0.7948315777622762, WCSS: 50583.71699534971, Time Taken: 55.32450461387634 seconds k: 50, Silhouette Score: 0.7983751067184075, WCSS: 50776.23331974545, Time Taken: 12.305404663085938 seconds k: 51, Silhouette Score: 0.791669989266832, WCSS: 51170.07619852613, Time Taken: 12.535984992980957 seconds k: 52, Silhouette Score: 0.7976659288060297, WCSS: 45421.276155812055, Time Taken: 8.632468938827515 seconds k: 53, Silhouette Score: 0.7989224828194511, WCSS: 44566.488136494816, Time Taken: 8.577142477035522 seconds k: 54, Silhouette Score: 0.8293581326510951, WCSS: 42119.00802898271, Time Taken: 8.389490842819214 seconds k: 55, Silhouette Score: 0.789892762163796, WCSS: 41991.56417328655, Time Taken: 7.896442174911499 seconds k: 56, Silhouette Score: 0.8232711515625952, WCSS: 40491.63861324899, Time Taken: 7.8990044593811035 seconds k: 57, Silhouette Score: 0.8008040468122573, WCSS: 40499.83601823361, Time Taken: 8.842151880264282 seconds k: 58, Silhouette Score: 0.8118856034733605, WCSS: 39648.87586369499, Time Taken: 11.153058052062988 seconds k: 59, Silhouette Score: 0.8237654276641893, WCSS: 36595.60467797983, Time Taken: 7.658572673797607 seconds k: 60, Silhouette Score: 0.7695358885552273, WCSS: 37260.22008281755, Time Taken: 8.92990756034851 seconds k: 61, Silhouette Score: 0.8236054588532493, WCSS: 33595.74126109876, Time Taken: 6.885226488113403 seconds k: 62, Silhouette Score: 0.8287085422450893, WCSS: 33674.46643113743, Time Taken: 9.012094497680664 seconds k: 63, Silhouette Score: 0.76297484583599, WCSS: 35843.76721882181, Time Taken: 7.951349496841431 seconds k: 64, Silhouette Score: 0.8021408707097821, WCSS: 30941.980445092824, Time Taken: 9.699987888336182 seconds k: 65, Silhouette Score: 0.7853608273727848, WCSS: 31649.967949477163, Time Taken: 8.849440574645996 seconds k: 66, Silhouette Score: 0.7834194644326498, WCSS: 31115.104687594416, Time Taken: 8.718227624893188 seconds k: 67, Silhouette Score: 0.7879665478399728, WCSS: 31607.411451211618, Time Taken: 9.10171127319336 seconds k: 68, Silhouette Score: 0.7778732609058202, WCSS: 28836.54567989254, Time Taken: 11.061966896057129 seconds k: 69, Silhouette Score: 0.8419497380913247, WCSS: 27511.94290020385, Time Taken: 11.135072469711304 seconds k: 70, Silhouette Score: 0.8095064999875181, WCSS: 29686.801885118515, Time Taken: 10.736343145370483 seconds k: 71, Silhouette Score: 0.7988862098544455, WCSS: 28315.54293937125, Time Taken: 10.754753351211548 seconds k: 72, Silhouette Score: 0.811370345459618, WCSS: 27334.985435366365, Time Taken: 9.813760042190552 seconds k: 73, Silhouette Score: 0.7812016940571781, WCSS: 25783.89865789666, Time Taken: 9.789621591567993 seconds k: 74, Silhouette Score: 0.7706518260519104, WCSS: 28332.927369483383, Time Taken: 9.29653263092041 seconds k: 75, Silhouette Score: 0.8020910123059354, WCSS: 27701.59025749798, Time Taken: 9.8978910446167 seconds k: 76, Silhouette Score: 0.8021665858204504, WCSS: 27578.033873043816, Time Taken: 9.199559211730957 seconds k: 77, Silhouette Score: 0.79936549542132, WCSS: 23995.97006583454, Time Taken: 11.353132247924805 seconds k: 78, Silhouette Score: 0.7871390924737632, WCSS: 24333.514848260787, Time Taken: 12.008180141448975 seconds k: 79, Silhouette Score: 0.7912961047457151, WCSS: 24040.755965282333, Time Taken: 8.763015031814575 seconds k: 80, Silhouette Score: 0.8051453111991831, WCSS: 24745.50750979829, Time Taken: 12.011964082717896 seconds Optimal k based on Silhouette Score: 69
Downloading artifacts: 0%| | 0/15 [00:00<?, ?it/s]
Uploading artifacts: 0%| | 0/4 [00:00<?, ?it/s]
DataFrame[unique_id: bigint, latitude: double, longitude: double, features: vector, prediction: int]
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import silhouette_samples
# Extract data for plotting
k_values, silhouette_values, wcss_values, _ = zip(*metrics)
# Plot 1: Silhouette Scores
plt.figure(figsize=(12, 6))
plt.plot(k_values, silhouette_values, 'bo-')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Score vs. Number of Clusters')
plt.grid(True)
plt.show()
# Plot 2: Elbow Plot (WCSS)
plt.figure(figsize=(12, 6))
plt.plot(k_values, wcss_values, 'ro-')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Within-Cluster Sum of Squares (WCSS)')
plt.title('Elbow Method for Optimal k')
plt.grid(True)
plt.show()
# Plot 3: Clustered Data Points
plt.figure(figsize=(12, 10)) # Increased figure height to accommodate legend below
# Convert PySpark DataFrame to numpy arrays
predictions = result_df['prediction'].to_numpy()
longitudes = result_df['longitude'].to_numpy()
latitudes = result_df['latitude'].to_numpy()
# Get unique cluster labels and sort them
unique_labels = sorted(np.unique(predictions))
# Create a color map
colors = plt.cm.rainbow(np.linspace(0, 1, len(unique_labels)))
# Plot each cluster
for label, color in zip(unique_labels, colors):
mask = predictions == label
plt.scatter(longitudes[mask], latitudes[mask],
c=[color], label=f'Cluster {label}', alpha=0.6)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title(f'K-Means Clustering Results (k={optimal_k})')
# Sort the legend labels
handles, labels = plt.gca().get_legend_handles_labels()
labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: int(t[0].split()[-1])))
# Place legend below the chart
plt.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, -0.05),
fancybox=True, shadow=True, ncol=5) # Adjust ncol as needed
plt.grid(True)
plt.tight_layout() # Adjust the plot to ensure all elements are visible
plt.show()
# Plot 4: Silhouette Plot
plt.figure(figsize=(12, 8))
# Compute silhouette scores for each sample
silhouette_samples_values = silhouette_samples(np.column_stack((longitudes, latitudes)), predictions)
y_lower = 10
for i in range(optimal_k):
# Aggregate the silhouette scores for samples belonging to cluster i
ith_cluster_silhouette_values = silhouette_samples_values[predictions == i]
ith_cluster_silhouette_values.sort()
size_cluster_i = ith_cluster_silhouette_values.shape[0]
y_upper = y_lower + size_cluster_i
color = colors[i]
plt.fill_betweenx(np.arange(y_lower, y_upper),
0, ith_cluster_silhouette_values,
facecolor=color, edgecolor=color, alpha=0.7)
# Label the silhouette plots with their cluster numbers at the middle
plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
# Compute the new y_lower for next plot
y_lower = y_upper + 10 # 10 for the 0 samples
plt.title("The silhouette plot for the various clusters.")
plt.xlabel("The silhouette coefficient values")
plt.ylabel("Cluster label")
# The vertical line for average silhouette score of all the values
plt.axvline(x=np.mean(silhouette_samples_values), color="red", linestyle="--")
plt.yticks([]) # Clear the yaxis labels / ticks
plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
plt.tight_layout()
plt.show()
# Optional: Save the plots
# plt.savefig('silhouette_scores.png')
# plt.savefig('elbow_plot.png')
# plt.savefig('clustering_results.png', bbox_inches='tight') # Ensure legend is included when saving
# plt.savefig('silhouette_plot.png', bbox_inches='tight')
import pyspark.pandas as ps
# Convert result_df to a pandas-on-Spark DataFrame
result_psdf = result_df
# Ensure result_psdf has only the necessary columns
result_psdf = result_psdf[['unique_id', 'prediction']]
# Merge the cluster assignments back to the original dataframe
# nonscaled_clustered_df = numeric_df.merge(
# result_psdf,
# on='unique_id',
# how='left'
# )
# Merge the cluster assignments back to the original dataframe
clustered_df = final_psdf.merge(
result_psdf,
on='unique_id',
how='left'
)
# Show the first few rows of the result
display(clustered_df.head())
# Get some basic statistics about the clusters
cluster_stats = clustered_df.groupby('prediction').agg({
'unique_id': 'count',
'latitude': ['mean', 'min', 'max'],
'longitude': ['mean', 'min', 'max']
}).reset_index()
print("\nCluster Statistics:")
print(cluster_stats)
# Optional: Save the clustered dataframe
# clustered_df.to_csv('clustered_data.csv', index=False)
| unique_id | latitude | longitude | has_Tennis | has_Parking | has_Alarm | has_Golf | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_Yes | bathrooms | bedrooms | scaled_square_feet | week_1 | week_2 | week_3 | week_4 | price | prediction |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 39.05 | -84.3439 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1.0 | 1 | 1.9962848729894185 | 0 | 0 | 1 | 0 | 1000.0 | 3 |
| 1 | 35.9165 | -78.9177 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1.0 | 1 | 2.174478477434787 | 0 | 1 | 0 | 0 | 1008.0 | 58 |
| 2 | 41.4779 | -87.3058 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1.0 | 1 | 1.8542464926344144 | 0 | 1 | 0 | 0 | 1019.0 | 43 |
| 3 | 32.91 | -97.5572 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1.0 | 1 | 1.8723241046795969 | 0 | 0 | 1 | 0 | 1025.0 | 13 |
| 4 | 32.7767 | -97.0816 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0.0 | 1 | 1.9652946809119631 | 0 | 0 | 1 | 0 | 1040.0 | 13 |
Cluster Statistics:
prediction unique_id latitude longitude
count mean min max mean min max
0 31 964 35.458441 33.8994 36.4297 -97.438064 -99.3992 -95.7854
1 65 928 37.676690 36.5877 38.5392 -122.150870 -122.9650 -120.8502
2 53 123 36.288060 34.6530 37.3395 -119.723941 -120.9770 -118.9327
3 34 3739 40.752542 40.0711 41.9398 -74.146435 -75.5692 -73.0548
4 28 410 47.151638 44.3600 48.8467 -96.939593 -99.1233 -94.8577
5 27 1120 41.298523 40.6246 43.7274 -96.257838 -99.0865 -95.2202
6 26 679 26.400779 24.5645 27.0783 -81.821737 -82.3809 -81.5339
7 44 2562 47.587022 46.2858 48.7871 -122.186509 -123.0586 -119.2833
8 12 749 38.689381 37.6656 40.9288 -121.466670 -124.2265 -120.9456
9 22 602 35.415762 34.1847 37.0926 -82.970606 -84.3648 -81.8052
10 47 516 38.696588 37.3130 40.4592 -90.551110 -92.3696 -88.3730
11 1 4397 34.052055 33.7435 35.6241 -118.336423 -119.7428 -117.6882
12 52 75 47.082651 45.6661 47.9717 -115.906452 -118.9707 -114.0109
13 13 7679 32.857835 31.7894 33.8169 -96.794428 -98.1898 -94.6849
14 16 5782 42.398213 41.4764 44.8163 -71.161118 -72.0320 -68.7788
15 6 311 45.190544 42.1583 45.7941 -122.695243 -123.3734 -121.2030
16 3 2194 39.164440 37.6675 40.8297 -84.403614 -85.4000 -82.7679
17 20 291 27.682800 26.1596 28.8584 -97.444457 -99.5123 -97.0014
18 40 58 45.907878 42.5425 47.4759 -109.852955 -112.0096 -108.1832
19 57 201 39.033606 38.4093 39.2115 -96.035594 -96.8382 -95.6403
20 54 1215 33.931516 33.4810 34.8870 -117.158565 -117.6097 -115.7200
21 48 652 32.231140 31.3517 35.1565 -110.837331 -111.2942 -106.3886
22 5 6320 39.666363 35.1038 42.1244 -104.975425 -108.5479 -102.8964
23 19 1222 30.371511 29.5746 32.4200 -91.021634 -93.2763 -88.5209
24 64 350 32.811575 31.8744 34.1751 -80.279864 -82.1391 -79.4699
25 41 351 41.848046 41.0055 43.0597 -92.802444 -94.1803 -90.3928
26 15 1096 42.248487 40.8623 46.4805 -83.794953 -85.9328 -82.4822
27 43 1540 42.039469 40.0656 46.5500 -87.997466 -89.7165 -85.8527
28 37 808 40.710100 39.1083 43.6865 -111.842660 -114.0215 -108.6071
29 61 143 44.270130 42.9236 44.9449 -72.988917 -74.9555 -71.4988
30 17 734 40.472663 38.9239 42.1100 -80.015422 -80.7039 -78.4057
31 9 74 33.602236 31.8666 37.0439 -101.624292 -103.2484 -99.7024
32 35 83 43.167627 40.6194 43.6620 -116.283498 -116.9378 -113.2907
33 4 1326 28.494135 27.6361 29.2867 -81.498701 -82.5222 -80.4261
34 59 391 30.261091 29.4478 31.8486 -81.680929 -83.3267 -81.1211
35 55 267 36.522928 35.1878 37.4133 -93.850739 -94.7725 -92.5940
36 8 575 44.966720 43.7973 47.1165 -93.191948 -94.3728 -88.5631
37 23 2275 37.623038 36.4531 38.6310 -77.582103 -79.1370 -77.2433
38 39 676 30.341089 29.8796 31.7509 -97.744829 -98.9677 -96.9336
39 49 357 47.632492 44.0763 48.4169 -101.834662 -103.8239 -100.1875
40 7 731 35.223555 33.1550 37.1899 -86.597405 -87.6872 -85.0250
41 51 158 37.752265 37.5649 39.3614 -97.464472 -100.8498 -96.8648
42 63 712 39.102153 38.7603 39.7874 -94.563581 -94.9493 -93.5572
43 10 734 41.878467 40.7750 42.9367 -72.672399 -74.0287 -71.9117
44 50 2451 27.858158 27.0525 29.0234 -82.531026 -82.8268 -81.8186
45 45 441 38.935525 37.6408 39.3069 -94.787060 -95.4650 -94.6260
46 38 440 39.497655 38.8295 39.6582 -119.801676 -120.0351 -119.2317
47 25 887 26.222205 25.3801 27.5916 -80.235295 -80.5047 -80.0566
48 24 1711 33.567803 32.8931 35.2270 -111.998285 -112.5707 -111.3066
49 62 1647 40.007466 38.7258 41.4840 -74.940790 -75.9959 -74.1990
50 29 4832 33.853643 31.7036 35.1986 -84.353385 -86.1009 -83.1686
51 21 1560 36.399064 35.6176 37.9719 -79.866392 -81.5113 -78.9306
52 32 1518 41.346071 39.9628 41.9866 -81.591791 -82.8181 -80.3126
53 60 114 43.026267 41.1351 44.3024 -77.187233 -78.9607 -75.2143
54 56 163 30.875948 30.1607 32.8418 -86.018401 -88.2373 -84.0890
55 58 3226 35.788215 34.9674 36.7099 -78.683957 -79.3885 -77.3486
56 33 1958 35.154421 33.9006 35.9083 -80.898734 -81.9645 -79.7341
57 11 2408 36.165366 34.4752 38.4533 -115.172531 -116.2805 -112.8266
58 68 922 29.528844 28.9673 30.1051 -98.548845 -99.1548 -97.9657
59 14 696 33.831681 31.7872 35.4961 -92.950421 -94.6692 -90.8945
60 42 1274 40.012806 38.2313 41.1211 -82.960595 -84.0782 -81.4792
61 2 1560 29.894512 28.9520 31.7067 -95.455782 -96.3777 -93.7581
62 30 442 35.265488 33.4345 37.0376 -89.893783 -91.2012 -88.2511
63 66 249 34.271618 33.6285 35.0960 -77.921776 -79.4567 -76.7601
64 46 1612 33.769279 33.4383 34.1383 -117.928792 -118.1973 -117.5596
65 67 848 38.840626 37.2068 40.7901 -85.968491 -88.1017 -85.3842
66 0 2459 36.958129 36.0217 37.8946 -76.325611 -76.7583 -75.5594
67 18 8597 39.006453 38.1175 40.9381 -77.003070 -78.5107 -76.1512
68 36 1332 32.874022 32.5601 33.3694 -117.084980 -117.3526 -114.5020
clustered_df.columns
Index(['unique_id', 'latitude', 'longitude', 'has_Tennis', 'has_Parking',
'has_Alarm', 'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
'has_photo_no', 'has_photo_yes', 'pets_allowed_Yes', 'bathrooms',
'bedrooms', 'scaled_square_feet', 'week_1', 'week_2', 'week_3',
'week_4', 'price', 'prediction'],
dtype='object')
VIF scores and Linear Regressions by Cluster¶
We created several functions that calculate the VIF score for each cluster. This isn't trivial because it requires checking for constant columns, removing highly correlated columns with VIF scores above 5 and looping this process until all the columns have a VIF score under 5.
We also calculated a Linear Regression for each cluster according to the VIF feature selection process. We save it all into a dataframe for viewing.
# renaming the prediction column for readability
clustered_df = clustered_df.rename(columns={'prediction': 'clusters'})
clustered_df.columns
Index(['unique_id', 'latitude', 'longitude', 'has_Tennis', 'has_Parking',
'has_Alarm', 'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
'has_photo_no', 'has_photo_yes', 'pets_allowed_Yes', 'bathrooms',
'bedrooms', 'scaled_square_feet', 'week_1', 'week_2', 'week_3',
'week_4', 'price', 'clusters'],
dtype='object')
We had an issue where the pandas dataframe to output the results would not work when using pyspark dataframes. There was a conflict with worker nodes and driver.
import pyspark.pandas as ps
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Initialize Spark session
spark = SparkSession.builder \
.appName("VIF Calculation and Price Prediction") \
.getOrCreate()
# Assuming clustered_df is a pandas-on-Spark DataFrame
cluster_col = 'clusters'
unique_id_col = 'unique_id'
price_col = 'price'
numeric_columns = [col for col in clustered_df.columns if col not in [unique_id_col, cluster_col, price_col]]
# Convert to Spark DataFrame for efficient processing
spark_df = clustered_df.to_spark()
def remove_constant_columns(df):
return df.loc[:, df.nunique() != 1]
def calculate_vif(pdf, feature_cols):
vif_data = []
for col_name in feature_cols:
y = pdf[col_name]
X = pdf[[x for x in feature_cols if x != col_name]]
lr = LinearRegression().fit(X, y)
r2 = lr.score(X, y)
vif = 1 / (1 - r2)
vif_data.append((col_name, vif))
return pd.DataFrame(vif_data, columns=['feature', 'VIF'])
def iterative_vif(pdf, feature_cols, threshold=5):
while True:
vif_df = calculate_vif(pdf, feature_cols)
max_vif = vif_df['VIF'].max()
if max_vif < threshold:
break
feature_to_remove = vif_df.loc[vif_df['VIF'].idxmax(), 'feature']
feature_cols.remove(feature_to_remove)
if len(feature_cols) < 2:
break
return vif_df, feature_cols
def predict_price(pdf, feature_cols):
X = pdf[feature_cols]
y = pdf[price_col]
model = LinearRegression().fit(X, y)
predictions = model.predict(X)
mse = mean_squared_error(y, predictions)
r2 = r2_score(y, predictions)
n = len(y)
p = len(feature_cols)
adjusted_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)
return pd.DataFrame({'feature': feature_cols, 'coefficient': model.coef_}), model.intercept_, mse, r2, adjusted_r2
result_schema = StructType([
StructField("feature", StringType(), True),
StructField("VIF", DoubleType(), True),
StructField("coefficient", DoubleType(), True),
StructField("cluster", IntegerType(), True),
StructField("intercept", DoubleType(), True),
StructField("mse", DoubleType(), True),
StructField("r2", DoubleType(), True),
StructField("adjusted_r2", DoubleType(), True),
StructField("row_count", IntegerType(), True)
])
def process_group(pdf):
cluster_value = pdf[cluster_col].iloc[0]
row_count = len(pdf)
non_constant_df = remove_constant_columns(pdf[numeric_columns])
feature_cols = non_constant_df.columns.tolist()
if len(feature_cols) < 2:
return pd.DataFrame(columns=['feature', 'VIF', 'coefficient', 'cluster', 'intercept', 'mse', 'r2', 'adjusted_r2', 'row_count'])
vif_df, final_features = iterative_vif(non_constant_df, feature_cols)
coef_df, intercept, mse, r2, adjusted_r2 = predict_price(pdf, final_features)
result_df = vif_df[vif_df['feature'].isin(final_features)].merge(coef_df, on='feature', how='left')
result_df['cluster'] = cluster_value
result_df['intercept'] = intercept
result_df['mse'] = mse
result_df['r2'] = r2
result_df['adjusted_r2'] = adjusted_r2
result_df['row_count'] = row_count
return result_df
# Apply the function to each group using groupBy and applyInPandas
results = spark_df.groupBy(cluster_col).applyInPandas(process_group, schema=result_schema)
# Convert to pandas for easier manipulation and display
results_pd = results.toPandas()
# Display results
print("\nResults for each cluster:")
for cluster in results_pd['cluster'].unique():
cluster_results = results_pd[results_pd['cluster'] == cluster]
print(f"\nCluster {cluster}:")
print("Features, VIF, and Coefficients:")
print(cluster_results[['feature', 'VIF', 'coefficient']])
print(f"Intercept: {cluster_results['intercept'].iloc[0]}")
print(f"Mean Squared Error: {cluster_results['mse'].iloc[0]}")
print(f"R-squared: {cluster_results['r2'].iloc[0]}")
print(f"Adjusted R-squared: {cluster_results['adjusted_r2'].iloc[0]}")
print(f"Row count: {cluster_results['row_count'].iloc[0]}")
# Optional: Save to CSV
# results_pd.to_csv('vif_and_regression_results_by_cluster.csv', index=False)
Results for each cluster:
Cluster 1:
Features, VIF, and Coefficients:
feature VIF coefficient
0 latitude 1.136220 -247.743354
1 longitude 1.136633 -1149.969492
2 has_Tennis 1.164859 26.878715
3 has_Parking 1.372739 -17.928283
4 has_Alarm 1.025505 -439.268167
5 has_TV 1.143097 280.431279
6 has_Clubhouse 1.202413 -56.860778
7 has_Playground 1.146448 -128.586345
8 has_Refrigerator 1.665827 49.902413
9 has_Cable_or_Satellite 1.804974 -111.926752
10 has_Unknown 1.609796 26.719641
11 has_Gated 1.513861 -264.302005
12 has_Pool 1.555737 248.563655
13 has_Wood_Floors 1.169279 -142.578390
14 has_Internet_Access 1.568422 242.817799
15 has_View 1.069060 222.489605
16 has_Elevator 1.564472 351.235785
17 has_Hot_Tub 1.269900 -69.632048
18 has_Gym 1.543306 21.028659
19 has_Storage 1.106800 -34.589728
20 has_Doorman 1.026988 1778.738531
21 has_Dishwasher 1.941071 -168.951483
22 has_Washer_Dryer 1.152290 17.489561
23 has_Patio/Deck 1.288443 -7.775956
24 has_Garbage_Disposal 1.130511 -296.488063
25 has_Luxury 1.009518 636.488718
26 has_AC 1.657418 -168.877662
27 has_Fireplace 1.235341 -17.110677
28 has_photo_no 1.445649 63.012089
29 has_photo_yes 1.725025 -45.781483
30 pets_allowed_Yes 1.055916 298.006921
31 bathrooms 2.901135 776.553136
32 bedrooms 2.323574 -271.611015
33 scaled_square_feet 2.493659 986.087635
34 week_1 1.062504 -131.932388
35 week_2 1.295825 -708.919459
36 week_4 1.345707 -371.165253
Intercept: -127855.00168877741
Mean Squared Error: 1849043.8334700346
R-squared: 0.6105100713030106
Adjusted R-squared: 0.6072040085909691
Row count: 4397
Cluster 12:
Features, VIF, and Coefficients:
feature VIF coefficient
37 latitude 1.955843 -290.052256
38 longitude 2.092010 -139.525689
39 has_Tennis 1.205851 43.476088
40 has_Parking 1.616851 31.052135
41 has_Alarm 1.640565 27.613751
42 has_TV 1.581251 -32.496693
43 has_Clubhouse 1.857468 73.153362
44 has_Playground 1.506839 44.577856
45 has_Refrigerator 2.394159 -33.323752
46 has_Cable_or_Satellite 2.636075 -73.675993
47 has_Unknown 1.817353 120.887298
48 has_Gated 1.439450 -25.730953
49 has_Pool 1.572800 50.645735
50 has_Wood_Floors 1.151079 117.614863
51 has_Internet_Access 1.911567 -25.432871
52 has_View 1.120854 -14.514612
53 has_Elevator 1.160440 711.504754
54 has_Hot_Tub 1.822884 109.259779
55 has_Gym 1.970108 51.145623
56 has_Storage 1.842916 -5.886668
57 has_Dishwasher 2.599478 24.212583
58 has_Washer_Dryer 1.996224 107.759973
59 has_Patio/Deck 2.148608 -78.014403
60 has_Garbage_Disposal 1.827727 -36.551376
61 has_Luxury 1.114419 -208.309708
62 has_AC 1.974779 -89.554805
63 has_Fireplace 1.870510 -11.550000
64 has_photo_no 1.426984 38.180915
65 has_photo_yes 1.315826 11.031244
66 pets_allowed_Yes 1.162067 58.473299
67 bathrooms 2.541705 212.031868
68 bedrooms 2.675638 -79.790711
69 scaled_square_feet 3.394268 247.786612
70 week_1 1.107752 137.858566
71 week_2 1.097989 -320.335449
72 week_3 1.259794 100.147942
Intercept: -5068.981980932094
Mean Squared Error: 62606.5545276049
R-squared: 0.6074576652344965
Adjusted R-squared: 0.5876100190946676
Row count: 749
Cluster 22:
Features, VIF, and Coefficients:
feature VIF coefficient
73 latitude 2.746369 -64.685637
74 longitude 3.010876 -14.950513
75 has_Tennis 1.728615 -15.723408
76 has_Parking 1.694958 85.669375
77 has_TV 1.117862 -71.401956
78 has_Clubhouse 2.319333 9.360804
79 has_Playground 2.237522 -77.718158
80 has_Refrigerator 3.671827 -3.846969
81 has_Cable_or_Satellite 3.195047 21.139152
82 has_Unknown 1.832445 43.981893
83 has_Gated 1.647874 61.841669
84 has_Pool 2.291595 79.196003
85 has_Wood_Floors 1.496837 19.812707
86 has_Internet_Access 2.370342 2.358234
87 has_View 1.428881 -15.219875
88 has_Elevator 1.452879 272.564138
89 has_Hot_Tub 1.675968 186.118612
90 has_Gym 2.007903 1.110050
91 has_Storage 1.539032 7.999268
92 has_Dishwasher 3.282462 -61.868464
93 has_Washer_Dryer 2.002200 -13.891629
94 has_Patio/Deck 1.601004 -48.768162
95 has_Garbage_Disposal 2.184648 -123.367148
96 has_AC 2.305410 -12.359943
97 has_Fireplace 1.684242 39.075864
98 has_photo_no 1.286692 120.650314
99 has_photo_yes 2.975544 129.981921
100 pets_allowed_Yes 1.216509 115.876091
101 bathrooms 2.807367 42.441056
102 bedrooms 3.252056 24.070947
103 scaled_square_feet 3.131939 174.920828
104 week_1 1.166099 173.587776
105 week_3 1.306310 9.654138
106 week_4 2.934345 220.204099
Intercept: 1193.6600269323744
Mean Squared Error: 37715.09578971941
R-squared: 0.5788756843542795
Adjusted R-squared: 0.5536230798887514
Row count: 602
Cluster 26:
Features, VIF, and Coefficients:
feature VIF coefficient
107 latitude 3.088071 -2590.291689
108 longitude 3.767416 -4059.718771
109 has_Tennis 1.481238 686.097725
110 has_Parking 1.437362 -140.934793
111 has_Alarm 1.032300 -1454.094111
112 has_Golf 1.199589 1559.394386
113 has_TV 1.273073 430.674765
114 has_Clubhouse 1.435624 -60.057968
115 has_Playground 1.269781 -656.544585
116 has_Refrigerator 4.653471 72.435765
117 has_Cable_or_Satellite 1.517556 -635.922424
118 has_Unknown 1.555993 56.744970
119 has_Gated 1.432860 -333.804514
120 has_Pool 1.694521 509.007485
121 has_Wood_Floors 1.236230 -636.120235
122 has_Internet_Access 2.110323 29.185230
123 has_View 1.238710 287.926141
124 has_Elevator 2.463927 74.493645
125 has_Hot_Tub 1.125042 -592.500676
126 has_Gym 1.859398 138.434173
127 has_Storage 1.994534 -558.773836
128 has_Dishwasher 3.621599 -26.907471
129 has_Washer_Dryer 1.490012 -146.631208
130 has_Patio/Deck 1.964499 119.526444
131 has_Garbage_Disposal 1.093270 -802.984673
132 has_Luxury 1.067727 750.705291
133 has_AC 2.346342 35.577996
134 has_Fireplace 1.119332 5795.457881
135 has_photo_no 1.235973 -346.858132
136 has_photo_yes 1.324119 -338.920415
137 pets_allowed_Yes 1.392810 34.918839
138 bathrooms 3.321753 -53.975797
139 bedrooms 3.049911 -40.964581
140 scaled_square_feet 3.666878 825.642313
141 week_1 1.074234 652.750976
142 week_2 1.148029 -1439.437462
143 week_3 1.846748 -371.289173
Intercept: -263846.43893598486
Mean Squared Error: 2251678.5538327172
R-squared: 0.5093369571263023
Adjusted R-squared: 0.48101475340348354
Row count: 679
Cluster 27:
Features, VIF, and Coefficients:
feature VIF coefficient
144 latitude 1.434561 -18.337101
145 longitude 1.247024 31.541353
146 has_Tennis 1.463843 -13.994646
147 has_Parking 1.577460 57.106376
148 has_Golf 1.052277 -5.624343
149 has_TV 1.403699 -47.228300
150 has_Clubhouse 2.030043 33.666593
151 has_Playground 1.250824 -65.830676
152 has_Refrigerator 3.046823 21.402016
153 has_Cable_or_Satellite 2.133555 46.345236
154 has_Unknown 2.041357 68.925699
155 has_Gated 1.263355 188.265968
156 has_Pool 2.278023 -9.992079
157 has_Wood_Floors 1.238778 -35.906637
158 has_Internet_Access 1.562636 3.386001
159 has_View 1.120892 -143.918867
160 has_Elevator 1.511279 78.647699
161 has_Hot_Tub 1.320686 144.985027
162 has_Gym 1.944179 -24.140229
163 has_Storage 1.572828 14.920366
164 has_Dishwasher 2.941548 -35.006950
165 has_Washer_Dryer 1.694087 47.422258
166 has_Patio/Deck 1.762925 -6.175560
167 has_Garbage_Disposal 1.793853 -73.918466
168 has_Luxury 1.015439 59.459176
169 has_AC 1.579443 -91.858526
170 has_Fireplace 1.673575 15.885561
171 has_photo_no 1.103473 -1.090400
172 has_photo_yes 1.313647 20.604818
173 pets_allowed_Yes 1.142320 -1.107586
174 bathrooms 2.458125 137.730999
175 bedrooms 2.061867 -33.644569
176 scaled_square_feet 2.660010 207.588477
177 week_1 1.032163 -251.972031
178 week_2 1.325146 -15.453483
179 week_3 1.020268 132.144879
Intercept: 4044.5581003572042
Mean Squared Error: 26833.93966146942
R-squared: 0.6173742372465155
Adjusted R-squared: 0.6046553753267321
Row count: 1120
Cluster 28:
Features, VIF, and Coefficients:
feature VIF coefficient
180 latitude 1.425622 24.484437
181 longitude 1.282202 124.315036
182 has_Tennis 1.183380 -1.155128
183 has_Parking 1.725407 -37.553238
184 has_TV 1.903738 48.711510
185 has_Clubhouse 1.349101 107.577374
186 has_Playground 1.264631 -96.232067
187 has_Refrigerator 3.523577 -61.328494
188 has_Cable_or_Satellite 4.250913 -34.319347
189 has_Unknown 2.394420 -17.439574
190 has_Gated 3.295068 -4.290532
191 has_Pool 3.080604 -167.343480
192 has_Wood_Floors 1.146711 16.874702
193 has_Internet_Access 1.497079 69.243398
194 has_Elevator 3.176906 55.556203
195 has_Hot_Tub 3.939811 40.333528
196 has_Gym 1.993082 40.473537
197 has_Storage 1.716205 44.823308
198 has_Dishwasher 3.549254 -24.571060
199 has_Washer_Dryer 1.974629 72.465065
200 has_Patio/Deck 3.114274 -7.847409
201 has_Garbage_Disposal 1.427444 -28.976934
202 has_Luxury 1.066434 146.058467
203 has_AC 2.981800 -17.665002
204 has_Fireplace 1.138094 -66.272520
205 has_photo_no 1.408960 -101.897266
206 has_photo_yes 1.351421 -63.223590
207 pets_allowed_Yes 1.298637 91.294944
208 bathrooms 3.633205 159.960339
209 bedrooms 2.588370 18.215804
210 scaled_square_feet 4.171135 185.561875
211 week_2 1.202475 -145.302659
212 week_3 1.264658 -437.549692
Intercept: 11039.86353498449
Mean Squared Error: 23716.220990500115
R-squared: 0.8393843166510828
Adjusted R-squared: 0.8252877274209917
Row count: 410
Cluster 31:
Features, VIF, and Coefficients:
feature VIF coefficient
213 latitude 2.249864 368.020986
214 longitude 1.994283 -169.001600
215 has_Tennis 2.576161 -7.958800
216 has_Parking 2.943782 150.163005
217 has_Alarm 1.086131 -84.731674
218 has_TV 1.916234 -14.326476
219 has_Clubhouse 2.839885 -19.005652
220 has_Playground 1.692816 -64.938845
221 has_Refrigerator 2.322993 50.778589
222 has_Cable_or_Satellite 2.324485 13.637131
223 has_Unknown 3.934225 344.175038
224 has_Gated 2.457681 160.477971
225 has_Pool 4.945161 253.461813
226 has_Wood_Floors 2.710929 186.201573
227 has_Internet_Access 1.503650 -11.573857
228 has_Elevator 1.033379 -16.711637
229 has_Hot_Tub 1.759852 -297.664109
230 has_Gym 4.528346 -44.271711
231 has_Storage 3.028234 193.432546
232 has_Dishwasher 2.639886 30.997638
233 has_Washer_Dryer 2.221956 235.076917
234 has_Patio/Deck 1.987966 -65.699808
235 has_Garbage_Disposal 1.983527 -135.533095
236 has_Luxury 1.051454 463.344151
237 has_AC 1.547080 -184.501910
238 has_Fireplace 2.105521 -110.259339
239 has_photo_no 2.948581 76.879004
240 has_photo_yes 3.665159 41.194941
241 pets_allowed_Yes 1.413314 -1.249423
242 bathrooms 3.142061 -170.473769
243 bedrooms 3.919592 29.994665
244 scaled_square_feet 4.372655 257.346292
245 week_2 1.299492 -8.517294
246 week_4 2.534676 -47.734478
Intercept: -29325.66692899796
Mean Squared Error: 64063.53799535754
R-squared: 0.5097568224691791
Adjusted R-squared: 0.49181466096643645
Row count: 964
Cluster 34:
Features, VIF, and Coefficients:
feature VIF coefficient
247 latitude 1.152122 35.064193
248 longitude 1.366616 1257.968726
249 has_Tennis 1.821905 -61.223281
250 has_Parking 1.378773 27.438560
251 has_Alarm 1.166818 -77.778679
252 has_TV 1.587315 125.879702
253 has_Clubhouse 1.422935 -223.079381
254 has_Playground 1.314426 -115.753163
255 has_Refrigerator 2.041495 -157.346325
256 has_Cable_or_Satellite 2.059411 -153.683733
257 has_Unknown 1.685773 135.702883
258 has_Gated 1.183067 -313.273162
259 has_Pool 2.035635 1.346806
260 has_Wood_Floors 1.347410 -48.579155
261 has_Internet_Access 1.661090 107.568112
262 has_View 1.092364 323.677881
263 has_Elevator 1.509434 -75.358716
264 has_Hot_Tub 1.360583 4.585270
265 has_Gym 2.323241 237.121114
266 has_Storage 1.256634 -17.336234
267 has_Doorman 1.111054 822.118412
268 has_Dishwasher 2.277427 176.354908
269 has_Washer_Dryer 1.691046 -17.788446
270 has_Patio/Deck 1.276570 200.875856
271 has_Garbage_Disposal 1.440069 13.357643
272 has_Luxury 1.013006 235.957884
273 has_AC 1.666563 -76.260814
274 has_Fireplace 1.470333 148.222755
275 has_photo_no 1.588695 49.438454
276 has_photo_yes 1.666215 -24.564357
277 pets_allowed_Yes 1.063745 226.100371
278 bathrooms 1.840616 678.809654
279 bedrooms 1.709395 118.617813
280 scaled_square_feet 1.147502 55.977877
281 week_1 1.261274 787.484329
282 week_2 1.168254 -41.833901
283 week_4 1.165535 -67.326548
Intercept: 92559.24825598963
Mean Squared Error: 528643.6283043601
R-squared: 0.450272776159798
Adjusted R-squared: 0.4447769892692043
Row count: 3739
Cluster 44:
Features, VIF, and Coefficients:
feature VIF coefficient
284 latitude 1.201489 310.344190
285 longitude 1.208592 249.454993
286 has_Tennis 1.222968 74.492336
287 has_Parking 1.686367 -31.697034
288 has_Golf 1.030520 214.278831
289 has_TV 1.263832 -12.166824
290 has_Clubhouse 1.695784 57.333091
291 has_Playground 1.633750 -125.774369
292 has_Refrigerator 2.652774 217.137095
293 has_Cable_or_Satellite 2.337762 -138.414110
294 has_Unknown 2.071198 -54.211757
295 has_Gated 1.265729 -119.624941
296 has_Pool 1.740372 -175.779693
297 has_Wood_Floors 1.179533 -24.719450
298 has_Internet_Access 1.568236 26.295377
299 has_View 1.222926 188.648361
300 has_Elevator 1.139927 -115.024189
301 has_Hot_Tub 1.436480 -49.009597
302 has_Gym 1.840297 50.254818
303 has_Storage 1.496419 156.228641
304 has_Dishwasher 2.737684 -123.054841
305 has_Washer_Dryer 1.646259 -6.636130
306 has_Patio/Deck 1.935840 -13.171810
307 has_Garbage_Disposal 1.414632 -90.838754
308 has_Luxury 1.017971 -157.511953
309 has_AC 1.363665 530.067121
310 has_Fireplace 1.596622 -77.760333
311 has_photo_no 1.626369 103.904109
312 has_photo_yes 1.943934 10.036071
313 pets_allowed_Yes 1.274873 131.645676
314 bathrooms 2.472577 186.952026
315 bedrooms 2.571469 -94.291524
316 scaled_square_feet 3.002829 376.649398
317 week_1 1.047554 -230.806861
318 week_2 1.047663 -284.818582
319 week_4 2.007445 44.860292
Intercept: 16453.505092533036
Mean Squared Error: 150688.65937530802
R-squared: 0.5036166784693006
Adjusted R-squared: 0.4965395301227242
Row count: 2562
Cluster 47:
Features, VIF, and Coefficients:
feature VIF coefficient
320 latitude 1.173725 -87.225699
321 longitude 1.511964 67.603193
322 has_Tennis 1.694084 -67.797963
323 has_Parking 1.652125 116.437293
324 has_Alarm 1.054713 131.467717
325 has_TV 1.188287 346.805640
326 has_Clubhouse 2.045167 -222.523875
327 has_Playground 1.582571 88.770916
328 has_Refrigerator 2.029751 -96.460080
329 has_Cable_or_Satellite 1.815089 -92.387808
330 has_Unknown 2.046175 30.886440
331 has_Gated 1.644510 147.987510
332 has_Pool 1.965689 133.436188
333 has_Wood_Floors 1.335434 10.428920
334 has_Internet_Access 1.761018 96.365457
335 has_View 1.324641 325.643102
336 has_Elevator 1.489237 228.982610
337 has_Hot_Tub 1.324958 107.838628
338 has_Gym 1.833435 -6.480967
339 has_Storage 1.501372 48.356748
340 has_Doorman 1.089843 -154.602854
341 has_Dishwasher 2.046573 -50.550506
342 has_Washer_Dryer 1.455685 -48.532424
343 has_Patio/Deck 1.708304 -21.977611
344 has_Garbage_Disposal 2.144492 105.854561
345 has_AC 1.721562 -93.890229
346 has_Fireplace 1.224764 -99.693080
347 has_photo_no 1.189097 -139.766268
348 has_photo_yes 2.587584 63.037352
349 pets_allowed_Yes 1.279487 7.651626
350 bathrooms 2.839888 149.655941
351 bedrooms 2.583258 -19.761643
352 scaled_square_feet 3.379087 259.710399
353 week_3 1.307496 0.344846
Intercept: 9668.062412212703
Mean Squared Error: 85585.89904218251
R-squared: 0.6337659455703213
Adjusted R-squared: 0.6078782993112588
Row count: 516
Cluster 53:
Features, VIF, and Coefficients:
feature VIF coefficient
354 latitude 1.988396 -303.668476
355 longitude 1.785811 -610.592987
356 has_Tennis 2.491713 -88.706986
357 has_Parking 1.864728 -140.285203
358 has_TV 1.969075 -384.537969
359 has_Clubhouse 2.855335 108.990774
360 has_Playground 2.054019 -30.843364
361 has_Refrigerator 2.451989 -79.086514
362 has_Cable_or_Satellite 2.959612 221.480343
363 has_Unknown 2.047498 364.932766
364 has_Gated 1.979445 -114.542060
365 has_Pool 2.516879 6.770874
366 has_Wood_Floors 1.342344 -31.666811
367 has_Internet_Access 1.801831 -184.182748
368 has_Hot_Tub 1.794718 441.122052
369 has_Gym 2.154901 -34.070284
370 has_Storage 2.107997 209.432398
371 has_Dishwasher 3.518400 224.928337
372 has_Washer_Dryer 2.078196 -8.620650
373 has_Patio/Deck 2.058006 -136.772679
374 has_Garbage_Disposal 2.650698 -284.085952
375 has_AC 2.542053 -180.109964
376 has_Fireplace 1.471547 126.835507
377 has_photo_no 1.435952 -181.352802
378 has_photo_yes 4.152377 132.548000
379 pets_allowed_Yes 1.710351 -81.269633
380 bathrooms 2.759455 151.746953
381 bedrooms 2.656601 70.548665
382 week_3 2.046609 97.277592
383 week_4 3.124502 -224.486252
Intercept: -61167.46591885238
Mean Squared Error: 74242.4002257599
R-squared: 0.6938263919705878
Adjusted R-squared: 0.5939871719609968
Row count: 123
Cluster 65:
Features, VIF, and Coefficients:
feature VIF coefficient
384 latitude 2.250119 -1683.199581
385 longitude 2.185719 -2002.130850
386 has_Tennis 1.349149 151.867735
387 has_Parking 1.598988 10.777584
388 has_Alarm 1.124352 7376.139515
389 has_TV 2.005362 -118.129365
390 has_Clubhouse 1.562565 23.477113
391 has_Playground 1.456392 215.965049
392 has_Refrigerator 2.992003 195.176157
393 has_Cable_or_Satellite 1.680428 -70.390592
394 has_Unknown 2.203372 163.418644
395 has_Gated 1.280249 -149.245055
396 has_Pool 1.795889 -72.489192
397 has_Wood_Floors 1.196684 -272.050118
398 has_Internet_Access 1.711542 85.745903
399 has_View 1.088141 132.912074
400 has_Elevator 1.735218 241.397191
401 has_Hot_Tub 1.275935 -333.307705
402 has_Gym 1.888777 2.585058
403 has_Storage 1.455292 136.128962
404 has_Dishwasher 2.771766 108.861689
405 has_Washer_Dryer 1.719897 15.506317
406 has_Patio/Deck 1.670875 7.458357
407 has_Garbage_Disposal 1.377741 -249.112248
408 has_AC 1.684773 -96.833400
409 has_Fireplace 1.755189 -149.774451
410 has_photo_no 1.200381 -295.713235
411 has_photo_yes 2.285663 -93.118162
412 pets_allowed_Yes 1.557686 259.879881
413 bathrooms 3.256267 182.796803
414 bedrooms 2.599097 0.474360
415 scaled_square_feet 3.328509 581.533127
416 week_1 1.092685 -409.847543
417 week_3 1.088017 357.120561
Intercept: -180006.84738826434
Mean Squared Error: 592664.1799995424
R-squared: 0.5989894372154505
Adjusted R-squared: 0.58372139787091
Row count: 928
Cluster 3:
Features, VIF, and Coefficients:
feature VIF coefficient
418 latitude 2.306794 -185.993421
419 longitude 1.427660 -364.172196
420 has_Tennis 1.461539 -33.821252
421 has_Parking 1.640711 64.383381
422 has_Alarm 1.125003 107.323624
423 has_TV 1.337371 -61.435621
424 has_Clubhouse 1.683501 -52.094889
425 has_Playground 1.286114 -125.273184
426 has_Refrigerator 2.422976 77.505215
427 has_Cable_or_Satellite 2.021441 75.825175
428 has_Unknown 1.995962 46.619032
429 has_Gated 1.277878 120.101271
430 has_Pool 2
*** WARNING: max output size exceeded, skipping output. ***
8645 56.972420
1930 bathrooms 3.011470 82.448247
1931 bedrooms 3.279199 -46.349264
1932 scaled_square_feet 3.173206 199.718868
1933 week_1 1.007687 -141.724932
1934 week_2 1.072396 20.112800
1935 week_4 2.190593 -8.398322
Intercept: -172.67882695124513
Mean Squared Error: 38088.04746669037
R-squared: 0.4734549597793821
Adjusted R-squared: 0.4610087211398928
Row count: 1560
Cluster 30:
Features, VIF, and Coefficients:
feature VIF coefficient
1936 latitude 1.434548 -119.725832
1937 longitude 1.761341 -45.426298
1938 has_Tennis 1.865724 -79.817696
1939 has_Parking 1.700880 78.531074
1940 has_Alarm 1.354169 98.627273
1941 has_TV 1.718372 48.175926
1942 has_Clubhouse 2.096930 39.481177
1943 has_Playground 2.899749 -272.341227
1944 has_Refrigerator 4.628556 -68.908130
1945 has_Unknown 2.039074 -25.559077
1946 has_Gated 2.130883 -115.767498
1947 has_Pool 2.592135 -12.897698
1948 has_Wood_Floors 1.453908 -13.210499
1949 has_Internet_Access 2.177425 86.060680
1950 has_View 1.621438 215.024668
1951 has_Elevator 1.545671 181.025409
1952 has_Hot_Tub 1.208860 -339.199566
1953 has_Gym 2.972707 170.700642
1954 has_Storage 2.202129 142.952190
1955 has_Doorman 4.519825 814.833739
1956 has_Dishwasher 4.085272 143.477661
1957 has_Washer_Dryer 1.676665 -77.635153
1958 has_Patio/Deck 2.375977 -45.545854
1959 has_Garbage_Disposal 2.337646 -238.717300
1960 has_Luxury 1.051359 449.058538
1961 has_AC 2.157132 -114.413200
1962 has_Fireplace 1.782878 138.751078
1963 has_photo_no 1.300738 -145.215698
1964 has_photo_yes 1.994199 -75.707368
1965 pets_allowed_Yes 1.565147 59.757435
1966 bathrooms 3.298055 113.926688
1967 bedrooms 3.537323 -145.945105
1968 scaled_square_feet 3.254916 278.752334
1969 week_1 1.244012 -82.078739
1970 week_3 1.148181 -290.681247
1971 week_4 2.479113 -28.044157
Intercept: 531.8538397098578
Mean Squared Error: 42665.18494807305
R-squared: 0.7460699467049043
Adjusted R-squared: 0.7234983864120069
Row count: 442
Cluster 32:
Features, VIF, and Coefficients:
feature VIF coefficient
1972 latitude 1.339000 382.927082
1973 longitude 1.127576 5.484437
1974 has_Tennis 1.213700 -22.871709
1975 has_Parking 2.236835 48.415574
1976 has_Golf 1.114947 126.959837
1977 has_TV 1.702269 -112.351138
1978 has_Clubhouse 2.556595 67.422478
1979 has_Playground 1.584285 -93.889444
1980 has_Refrigerator 1.977698 -181.189102
1981 has_Cable_or_Satellite 2.956775 8.292049
1982 has_Unknown 2.295884 61.937302
1983 has_Gated 1.260178 104.458775
1984 has_Pool 1.994733 -202.327825
1985 has_Wood_Floors 1.269502 -225.838207
1986 has_Internet_Access 2.540800 -168.053146
1987 has_View 1.238982 271.953934
1988 has_Elevator 1.126334 -314.353936
1989 has_Hot_Tub 1.286818 22.262386
1990 has_Gym 1.701742 270.111426
1991 has_Storage 1.763043 223.119613
1992 has_Doorman 1.063766 94.237801
1993 has_Dishwasher 2.068223 84.871809
1994 has_Washer_Dryer 1.633566 -17.174185
1995 has_Patio/Deck 1.668328 -279.686605
1996 has_Garbage_Disposal 1.422668 335.853122
1997 has_Luxury 1.041415 174.818517
1998 has_AC 1.463632 -170.748884
1999 has_Fireplace 1.667752 17.808902
2000 has_photo_no 1.436973 170.171147
2001 has_photo_yes 1.474117 83.246918
2002 pets_allowed_Yes 1.202965 94.073830
2003 bathrooms 2.029350 499.765685
2004 bedrooms 1.852952 -147.056910
2005 scaled_square_feet 1.411079 133.927949
2006 week_2 1.179680 -52.238459
2007 week_4 1.217799 91.995805
Intercept: -15134.758273455582
Mean Squared Error: 181533.82197684297
R-squared: 0.48531669751266215
Adjusted R-squared: 0.4728058272293778
Row count: 1518
Cluster 33:
Features, VIF, and Coefficients:
feature VIF coefficient
2008 latitude 1.218155 46.638129
2009 longitude 1.130242 72.163760
2010 has_Tennis 1.310495 -47.914562
2011 has_Parking 1.586657 122.358706
2012 has_Alarm 1.730708 203.939909
2013 has_TV 1.453578 99.235270
2014 has_Clubhouse 1.672613 26.759635
2015 has_Playground 1.517128 -68.017787
2016 has_Refrigerator 2.261984 -65.204078
2017 has_Cable_or_Satellite 2.708779 -28.995376
2018 has_Unknown 1.839255 20.067400
2019 has_Gated 1.352524 36.698861
2020 has_Pool 1.869718 -10.653561
2021 has_Wood_Floors 1.081476 81.933412
2022 has_Internet_Access 2.341217 0.529009
2023 has_View 1.018347 43.744015
2024 has_Elevator 1.126737 207.376969
2025 has_Hot_Tub 1.113388 -23.998522
2026 has_Gym 1.873510 29.521275
2027 has_Storage 1.230639 25.752947
2028 has_Washer_Dryer 1.795317 -22.508226
2029 has_Patio/Deck 1.562762 -71.178821
2030 has_Garbage_Disposal 2.286279 6.793035
2031 has_Luxury 1.014664 -0.274277
2032 has_AC 2.709267 -84.374999
2033 has_Fireplace 1.620906 -19.515970
2034 has_photo_no 1.235139 125.566542
2035 has_photo_yes 2.753403 7.423620
2036 pets_allowed_Yes 1.027469 10.012207
2037 bathrooms 3.286275 90.047465
2038 bedrooms 4.341075 -35.102902
2039 scaled_square_feet 3.655762 186.707214
2040 week_1 2.481062 -93.406071
2041 week_2 2.581541 -140.206878
2042 week_3 1.292869 -176.340832
Intercept: 4876.42767092716
Mean Squared Error: 61121.11366852651
R-squared: 0.518020497193971
Adjusted R-squared: 0.5092435551553597
Row count: 1958
Cluster 36:
Features, VIF, and Coefficients:
feature VIF coefficient
2043 latitude 1.214092 -339.466340
2044 longitude 1.225386 -623.500724
2045 has_Tennis 1.213728 -85.389412
2046 has_Parking 1.475408 147.719157
2047 has_Alarm 1.087682 -50.415679
2048 has_TV 1.343998 10.059676
2049 has_Clubhouse 1.495789 24.129918
2050 has_Playground 1.210256 -162.914394
2051 has_Refrigerator 1.772236 -253.421600
2052 has_Cable_or_Satellite 1.801240 -81.064487
2053 has_Unknown 1.976963 181.223368
2054 has_Gated 1.239363 -62.198667
2055 has_Pool 1.900650 -45.134351
2056 has_Wood_Floors 1.114010 47.351885
2057 has_Internet_Access 1.461102 -36.674935
2058 has_View 1.080608 412.426908
2059 has_Elevator 1.211370 107.789522
2060 has_Hot_Tub 1.629382 48.811166
2061 has_Gym 1.752185 12.324874
2062 has_Storage 1.201905 -36.006885
2063 has_Dishwasher 1.957131 27.780426
2064 has_Washer_Dryer 1.506079 80.243971
2065 has_Patio/Deck 1.448740 7.002230
2066 has_Garbage_Disposal 1.229933 -52.563745
2067 has_Luxury 1.051873 278.178695
2068 has_AC 1.567322 -64.543497
2069 has_Fireplace 1.431528 -30.627746
2070 has_photo_no 1.187982 204.496758
2071 has_photo_yes 1.950547 110.375156
2072 pets_allowed_Yes 1.145488 149.715384
2073 bathrooms 2.920641 161.996648
2074 bedrooms 2.813122 -38.677423
2075 scaled_square_feet 3.090298 508.076729
2076 week_2 2.231954 -173.327902
2077 week_3 1.248974 179.142120
2078 week_4 3.152810 215.045093
Intercept: -61367.340321044416
Mean Squared Error: 226826.97421245003
R-squared: 0.6403385352002524
Adjusted R-squared: 0.6303402242096803
Row count: 1332
Cluster 42:
Features, VIF, and Coefficients:
feature VIF coefficient
2079 latitude 1.457918 -20.258362
2080 longitude 1.525291 -180.108841
2081 has_Tennis 1.326384 -66.157071
2082 has_Parking 1.405809 74.382883
2083 has_Alarm 1.253712 204.363912
2084 has_TV 1.106727 65.708483
2085 has_Clubhouse 1.642217 -53.235173
2086 has_Playground 1.407410 -74.774203
2087 has_Refrigerator 2.088436 -38.711318
2088 has_Cable_or_Satellite 1.624676 23.414526
2089 has_Unknown 1.694421 73.377212
2090 has_Gated 1.278913 25.873373
2091 has_Pool 2.232507 -22.638377
2092 has_Wood_Floors 1.298938 -3.895335
2093 has_Internet_Access 1.569960 17.791374
2094 has_View 1.379836 80.424282
2095 has_Elevator 1.215892 96.178942
2096 has_Hot_Tub 1.266222 -136.038511
2097 has_Gym 1.948828 35.353426
2098 has_Storage 1.266615 -0.375985
2099 has_Dishwasher 2.570369 -27.710983
2100 has_Washer_Dryer 1.662488 -23.266366
2101 has_Patio/Deck 1.462317 81.203264
2102 has_Garbage_Disposal 1.284778 129.664837
2103 has_AC 1.897240 -49.768315
2104 has_Fireplace 1.253740 91.652271
2105 has_photo_no 1.198865 20.487481
2106 has_photo_yes 1.272205 0.377905
2107 pets_allowed_Yes 1.272911 -196.226748
2108 bathrooms 2.304167 106.219177
2109 bedrooms 2.343638 -11.526678
2110 scaled_square_feet 2.695699 246.209419
2111 week_1 1.187871 120.794066
2112 week_2 1.083123 113.018405
2113 week_3 1.344681 -294.254762
Intercept: -13653.899972260646
Mean Squared Error: 69144.51026126942
R-squared: 0.5862816333736157
Adjusted R-squared: 0.5745852336709312
Row count: 1274
Cluster 46:
Features, VIF, and Coefficients:
feature VIF coefficient
2114 latitude 1.875284 -911.373570
2115 longitude 2.601867 -415.074850
2116 has_Tennis 1.339433 33.488034
2117 has_Parking 1.504466 100.307039
2118 has_Alarm 1.173955 176.716016
2119 has_TV 1.174047 185.907836
2120 has_Clubhouse 1.395971 -57.255588
2121 has_Playground 1.343494 -70.325367
2122 has_Refrigerator 1.437462 38.486647
2123 has_Cable_or_Satellite 1.526662 -66.246308
2124 has_Unknown 2.010487 76.214066
2125 has_Gated 1.309189 -72.380834
2126 has_Pool 1.714395 41.939706
2127 has_Wood_Floors 1.223167 4.064756
2128 has_Internet_Access 1.556862 138.708130
2129 has_View 1.085598 117.524758
2130 has_Elevator 1.279256 84.941620
2131 has_Hot_Tub 1.677418 -180.012837
2132 has_Gym 1.753432 37.093332
2133 has_Storage 1.261044 100.493986
2134 has_Dishwasher 1.704314 -44.476601
2135 has_Washer_Dryer 1.278378 55.556082
2136 has_Patio/Deck 1.604458 48.512500
2137 has_Garbage_Disposal 1.084447 -132.334307
2138 has_Luxury 1.020276 29.154011
2139 has_AC 1.505663 -52.444117
2140 has_Fireplace 1.280096 -57.924276
2141 has_photo_no 1.287154 -26.721409
2142 has_photo_yes 1.999875 -36.047594
2143 pets_allowed_Yes 1.150624 188.721565
2144 bathrooms 3.234787 83.644608
2145 bedrooms 2.751781 23.598758
2146 scaled_square_feet 3.474861 510.576341
2147 week_1 2.904790 67.404144
2148 week_2 1.764474 56.973074
2149 week_4 3.740246 168.268367
Intercept: -17673.73363666568
Mean Squared Error: 139154.96367493764
R-squared: 0.7423420510247722
Adjusted R-squared: 0.7364527264767671
Row count: 1612
Cluster 56:
Features, VIF, and Coefficients:
feature VIF coefficient
2150 latitude 2.170028 -100.344766
2151 longitude 2.117584 12.615448
2152 has_Tennis 1.459074 82.020004
2153 has_Parking 1.661409 -27.101088
2154 has_TV 3.566842 47.118351
2155 has_Clubhouse 1.646532 -14.391388
2156 has_Playground 1.936110 -243.922534
2157 has_Refrigerator 2.667911 -27.102534
2158 has_Cable_or_Satellite 2.816522 6.206294
2159 has_Unknown 2.510651 138.223435
2160 has_Gated 1.911457 -26.082147
2161 has_Pool 1.997424 66.041855
2162 has_Wood_Floors 1.384205 -186.540307
2163 has_Internet_Access 1.864278 -8.043419
2164 has_View 1.695226 9.362647
2165 has_Elevator 1.432498 -34.011724
2166 has_Hot_Tub 1.658091 -63.152912
2167 has_Gym 2.006888 51.372737
2168 has_Storage 2.173580 24.476048
2169 has_Washer_Dryer 2.154768 1.400483
2170 has_Patio/Deck 1.876410 10.653189
2171 has_Garbage_Disposal 2.499278 -70.705429
2172 has_Luxury 1.089022 8.340499
2173 has_AC 1.994308 -46.356439
2174 has_Fireplace 1.757130 53.306315
2175 has_photo_no 1.498413 -183.427457
2176 pets_allowed_Yes 1.577083 50.287594
2177 bathrooms 2.725094 130.244539
2178 scaled_square_feet 2.642961 153.419147
2179 week_2 2.692326 -74.824078
2180 week_3 2.019251 -51.861525
2181 week_4 2.855917 14.876479
Intercept: 4477.672591450521
Mean Squared Error: 49859.67990702523
R-squared: 0.5711007299589995
Adjusted R-squared: 0.4655255250258301
Row count: 163
Cluster 58:
Features, VIF, and Coefficients:
feature VIF coefficient
2182 latitude 1.421524 218.258813
2183 longitude 1.185976 -129.897425
2184 has_Tennis 1.285034 -78.741518
2185 has_Parking 1.454515 38.514787
2186 has_Alarm 1.315591 30.456136
2187 has_TV 1.172729 25.608230
2188 has_Clubhouse 1.611258 -40.897408
2189 has_Playground 1.498749 -55.171307
2190 has_Refrigerator 2.931232 29.632254
2191 has_Cable_or_Satellite 1.989209 -36.464235
2192 has_Unknown 1.723190 165.403510
2193 has_Gated 1.366783 79.276640
2194 has_Pool 1.867676 99.273047
2195 has_Wood_Floors 1.109303 42.659168
2196 has_Internet_Access 1.470281 -34.497868
2197 has_Elevator 1.268488 210.772445
2198 has_Hot_Tub 1.199878 -75.294908
2199 has_Gym 1.853778 1.576868
2200 has_Storage 1.397158 -9.369576
2201 has_Doorman 1.088001 136.955221
2202 has_Dishwasher 2.918221 -14.541034
2203 has_Washer_Dryer 1.575451 -61.855681
2204 has_Patio/Deck 1.566517 -5.095606
2205 has_Garbage_Disposal 2.007047 116.636134
2206 has_Luxury 1.021769 176.441407
2207 has_AC 1.968898 -66.277140
2208 has_Fireplace 1.431181 1.222987
2209 has_photo_no 1.264585 62.229652
2210 has_photo_yes 1.368267 36.836267
2211 pets_allowed_Yes 1.107388 263.676441
2212 bathrooms 3.032987 138.483815
2213 bedrooms 3.191892 -26.681383
2214 scaled_square_feet 3.102206 170.543747
2215 week_1 1.082545 39.998342
2216 week_2 1.208090 -5.651413
2217 week_4 1.551812 -63.026142
Intercept: -17796.755570972648
Mean Squared Error: 75324.15948242611
R-squared: 0.39623623909605477
Adjusted R-squared: 0.3894204675712689
Row count: 3226
Cluster 60:
Features, VIF, and Coefficients:
feature VIF coefficient
2218 latitude 2.123009 -115.781409
2219 longitude 2.100082 -16.119602
2220 has_Tennis 2.253828 -177.406174
2221 has_Parking 2.752087 -45.563338
2222 has_TV 3.895313 272.934468
2223 has_Clubhouse 1.799769 202.887031
2224 has_Playground 2.079895 -280.350030
2225 has_Refrigerator 3.056784 -18.297041
2226 has_Cable_or_Satellite 3.465682 -213.695628
2227 has_Unknown 2.531149 98.589712
2228 has_Gated 2.042325 434.855325
2229 has_Pool 3.825845 -119.647004
2230 has_Wood_Floors 1.777726 -18.674486
2231 has_Internet_Access 3.819287 336.127270
2232 has_View 1.743311 -305.357724
2233 has_Elevator 1.980473 218.060947
2234 has_Gym 4.963606 518.368322
2235 has_Storage 2.488823 87.709396
2236 has_Dishwasher 2.798890 56.055390
2237 has_Washer_Dryer 2.552049 -22.001300
2238 has_Patio/Deck 2.359162 -134.112805
2239 has_Garbage_Disposal 1.867332 -6.170532
2240 has_AC 2.937379 -110.055722
2241 has_Fireplace 1.663258 52.604120
2242 pets_allowed_Yes 2.132433 -29.710472
2243 bathrooms 2.507183 265.254484
2244 bedrooms 3.421376 -82.106029
2245 scaled_square_feet 3.533567 192.024207
2246 week_3 2.545025 -51.364393
2247 week_4 2.799710 -41.664168
Intercept: 4225.568148349634
Mean Squared Error: 68501.1109531393
R-squared: 0.6892352596100243
Adjusted R-squared: 0.5769106546497922
Row count: 114
Cluster 66:
Features, VIF, and Coefficients:
feature VIF coefficient
2248 latitude 3.409998 -421.607881
2249 has_Tennis 1.846251 8.821250
2250 has_Parking 2.436991 4.676629
2251 has_Alarm 1.498140 175.874248
2252 has_TV 1.696247 -62.137384
2253 has_Clubhouse 2.117571 -72.644932
2254 has_Playground 2.667748 -49.129911
2255 has_Refrigerator 1.774147 327.997043
2256 has_Cable_or_Satellite 2.153676 -118.811057
2257 has_Unknown 1.685202 377.455968
2258 has_Gated 1.349734 273.248556
2259 has_Pool 3.717100 5.222778
2260 has_Wood_Floors 1.343271 177.465321
2261 has_Internet_Access 2.467205 -12.459440
2262 has_View 1.433024 85.722113
2263 has_Elevator 1.408123 157.979621
2264 has_Gym 3.048268 145.643237
2265 has_Storage 2.340917 -0.895743
2266 has_Dishwasher 2.391963 -251.112716
2267 has_Washer_Dryer 1.776720 -72.858125
2268 has_Patio/Deck 1.894858 168.984759
2269 has_Garbage_Disposal 3.357753 6.634720
2270 has_AC 2.979578 -105.251496
2271 has_Fireplace 2.443905 -57.352369
2272 has_photo_no 1.221906 259.146236
2273 has_photo_yes 1.634173 -31.125308
2274 pets_allowed_Yes 1.233293 354.643471
2275 bathrooms 3.649868 368.093184
2276 bedrooms 3.252507 23.552735
2277 week_2 4.646306 -252.353027
2278 week_3 1.902023 706.908943
2279 week_4 3.085498 22.233817
Intercept: 14489.843942944506
Mean Squared Error: 44403.39239160985
R-squared: 0.7240033256743393
Adjusted R-squared: 0.6831149294779452
Row count: 249
Cluster 67:
Features, VIF, and Coefficients:
feature VIF coefficient
2280 latitude 2.067224 62.160826
2281 longitude 2.226709 -81.597665
2282 has_Tennis 1.382601 6.724395
2283 has_Parking 1.512593 86.576961
2284 has_Alarm 1.186014 -262.051074
2285 has_Golf 1.057580 247.587733
2286 has_TV 1.363565 104.802035
2287 has_Clubhouse 1.992063 120.663923
2288 has_Playground 1.611179 -128.639221
2289 has_Refrigerator 3.490503 -253.809052
2290 has_Cable_or_Satellite 2.127730 22.536840
2291 has_Unknown 1.822998 96.490416
2292 has_Gated 1.252106 -93.908765
2293 has_Pool 2.321319 -24.411215
2294 has_Wood_Floors 1.508811 50.377360
2295 has_Internet_Access 1.889853 -0.870209
2296 has_View 1.748370 -72.297914
2297 has_Elevator 1.234836 230.322871
2298 has_Hot_Tub 1.109766 91.350671
2299 has_Gym 2.141972 22.740849
2300 has_Storage 1.420197 29.779317
2301 has_Doorman 1.716865 -90.572396
2302 has_Dishwasher 3.145248 12.687062
2303 has_Washer_Dryer 1.517581 -13.131889
2304 has_Patio/Deck 1.756549 -52.154594
2305 has_Garbage_Disposal 2.002010 60.527475
2306 has_Luxury 1.021128 56.129693
2307 has_AC 1.519422 -1.028575
2308 has_Fireplace 1.738392 -109.222052
2309 has_photo_no 1.169499 300.463899
2310 has_photo_yes 2.029887 51.987184
2311 pets_allowed_Yes 1.381728 -232.292373
2312 bathrooms 2.602915 297.036227
2313 bedrooms 2.606112 48.756397
2314 scaled_square_feet 2.809306 156.389040
2315 week_1 1.209775 82.907275
2316 week_2 3.107711 -250.940979
2317 week_3 1.104171 432.438139
Intercept: -9060.405647152316
Mean Squared Error: 117856.24199536812
R-squared: 0.5790327827483543
Adjusted R-squared: 0.5592592917031596
Row count: 848
Cluster 68:
Features, VIF, and Coefficients:
feature VIF coefficient
2318 latitude 1.875840 490.302215
2319 longitude 1.720388 -124.443444
2320 has_Tennis 1.438158 -106.139867
2321 has_Parking 1.507178 44.710132
2322 has_TV 1.489304 93.191153
2323 has_Clubhouse 1.548149 -67.775147
2324 has_Playground 1.253032 -83.305832
2325 has_Refrigerator 2.463793 -60.584823
2326 has_Cable_or_Satellite 2.486560 69.507581
2327 has_Unknown 1.862351 -14.065256
2328 has_Gated 1.605061 21.739782
2329 has_Pool 2.220967 57.665546
2330 has_Wood_Floors 1.321396 -2.731416
2331 has_Internet_Access 1.842383 -10.828681
2332 has_Elevator 1.084526 268.663465
2333 has_Hot_Tub 1.366308 -25.098207
2334 has_Gym 2.020393 36.864115
2335 has_Storage 1.532423 -5.082540
2336 has_Dishwasher 3.181736 -52.622451
2337 has_Washer_Dryer 1.844509 36.961804
2338 has_Patio/Deck 1.962687 -32.942442
2339 has_Garbage_Disposal 1.689271 -139.072038
2340 has_Luxury 1.024418 -202.730014
2341 has_AC 2.677739 -99.920282
2342 has_Fireplace 1.524547 -35.187990
2343 has_photo_no 1.175013 -726.969527
2344 pets_allowed_Yes 2.865601 -43.225076
2345 bathrooms 3.805906 139.480935
2346 bedrooms 4.809382 -20.839717
2347 scaled_square_feet 4.339836 197.901698
2348 week_1 1.047782 53.770578
2349 week_3 1.281547 65.490925
2350 week_4 3.513481 71.518711
Intercept: -26254.838745430534
Mean Squared Error: 41348.97963689274
R-squared: 0.6466615990846267
Adjusted R-squared: 0.6335307801316905
Row count: 922
# Assuming results_pd is your pandas DataFrame with the results
# Get VIF results
vif_results = results_pd[['cluster', 'feature', 'VIF']]
# Pivot the results to get columns as features and rows as clusters
vif_pivot = vif_results.pivot(index='cluster', columns='feature', values='VIF')
# Fill NaN values with 'Constant' to indicate constant columns in that cluster
vif_pivot = vif_pivot.fillna('Constant')
# Sort columns alphabetically
vif_pivot = vif_pivot.reindex(sorted(vif_pivot.columns), axis=1)
# Print the results
print("\nVIF values for each feature by cluster:")
display(vif_pivot)
# Optional: Save to CSV
# vif_pivot.to_csv('vif_results_by_cluster.csv')
print("\nNote: 'Constant' indicates that the feature was constant within that cluster.")
VIF values for each feature by cluster:
/databricks/spark/python/pyspark/sql/pandas/conversion.py:413: UserWarning: createDataFrame attempted Arrow optimization because 'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, failed by the reason below: Could not convert 'Constant' with type str: tried to convert to double Attempting non-optimization as 'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to true. warn(msg)
| bathrooms | bedrooms | has_AC | has_Alarm | has_Cable_or_Satellite | has_Clubhouse | has_Dishwasher | has_Doorman | has_Elevator | has_Fireplace | has_Garbage_Disposal | has_Gated | has_Golf | has_Gym | has_Hot_Tub | has_Internet_Access | has_Luxury | has_Parking | has_Patio/Deck | has_Playground | has_Pool | has_Refrigerator | has_Storage | has_TV | has_Tennis | has_Unknown | has_View | has_Washer_Dryer | has_Wood_Floors | has_photo_no | has_photo_yes | latitude | longitude | pets_allowed_Yes | scaled_square_feet | week_1 | week_2 | week_3 | week_4 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2.4313366469111446 | 2.38908059149533 | 1.5783112544252038 | 1.3675680513779922 | 2.1409330091799403 | 1.5758662718925405 | 2.5286587342995803 | Constant | 1.3785178307740282 | 1.4234438270366876 | 1.551855981367239 | 1.1870914690336536 | Constant | 1.9280968340448836 | 1.1343034913231496 | 1.909597179075757 | 1.0082173126405871 | 1.447800084764559 | 1.5760612968687036 | 1.697917242608278 | 1.9799157328126682 | 1.7245756061901039 | 1.4353857117171984 | 1.719822345666638 | 1.3873075297868265 | 1.5199018865539182 | 1.1873804466211637 | 1.6053475285319163 | 1.182579244330641 | 1.1529739540435022 | 1.2461857667536582 | 1.5861296658582458 | 1.5518041332339716 | 1.0513697434664815 | 2.794695940274564 | 1.0315284989305222 | 1.0252404003413278 | Constant | 1.2193279673108346 |
| 2.901135167990877 | 2.3235739799030473 | 1.6574184415142563 | 1.0255047419095686 | 1.8049736709365445 | 1.2024125233993486 | 1.9410705162188633 | 1.026988450122588 | 1.5644717102636794 | 1.2353406871134645 | 1.1305105788496186 | 1.5138614412617817 | Constant | 1.5433060776614251 | 1.2699000860421927 | 1.5684224985176207 | 1.0095177893266927 | 1.3727390389488563 | 1.2884426181212996 | 1.1464479156797636 | 1.5557370505725343 | 1.6658266404657909 | 1.106799905653073 | 1.143096902539268 | 1.1648594772869822 | 1.6097962877668683 | 1.0690597996655222 | 1.1522902893697797 | 1.1692794752680078 | 1.4456488391716165 | 1.7250251838755504 | 1.136219710849327 | 1.1366327004006547 | 1.0559162807426818 | 2.4936592174552494 | 1.0625038472890709 | 1.2958253472865175 | Constant | 1.34570662714226 |
| 3.1613433939527935 | 3.7904670607744992 | 1.6050503304766364 | 1.0251071785840935 | 1.6943723188740996 | 1.2232508830641333 | 4.192245190310863 | Constant | 1.1154444241989858 | 1.3121328656157012 | 1.8743508838102967 | 1.130961306609217 | 1.0298200001969777 | 1.612920437464191 | 1.0615435665073059 | 1.489116959440945 | Constant | 1.311581104809747 | 1.5309656593711065 | 1.1531644119694064 | 1.4441732840424828 | 3.3885745899086133 | 1.2755862107126716 | 1.1222856786663244 | 1.1735714936396884 | 1.4330262931901374 | 1.0195880362760248 | 1.5340990147220055 | 1.0748168944657526 | 1.1058116899324468 | Constant | 1.4040350081368147 | 1.3060686858955133 | 1.139499431172358 | 3.267460709409809 | Constant | 1.3187360024154868 | 1.3931682820258968 | 1.973192352823795 |
| 2.2064670074330346 | 2.3394317824896196 | 1.5098481967979938 | 1.1250026104243382 | 2.021440879037626 | 1.6835005244593617 | 2.453484150258959 | Constant | 1.0820954498555768 | 1.5299368790437915 | 1.2868141623942329 | 1.2778777989376222 | Constant | 2.19467970535803 | 1.1187773160577144 | 1.6700654287546686 | 1.0239998812377127 | 1.6407112246282558 | 1.3961065888340254 | 1.2861143402212831 | 2.16663476880811 | 2.422975636295253 | 1.3750108771808087 | 1.337370887555502 | 1.461539335214416 | 1.9959623223181138 | 1.0252542472322228 | 1.4673935165291967 | 1.1749309976168287 | 1.3537827211652333 | 1.6057256141705822 | 2.3067935914958695 | 1.427659554111605 | 1.1340517482916548 | 2.888789473958262 | 1.010955232377544 | 1.04083212885838 | Constant | 2.569481524622883 |
| 2.9380721385921604 | 3.5715526094634757 | 2.119034908935995 | 1.1538404706281173 | 1.9646062358641778 | 1.7642785539902188 | 2.870404418273747 | 1.1210185572818498 | 1.0782337793637746 | 1.5355955705061706 | 1.429630711205407 | 1.6241701061981253 | Constant | 2.1630435679657753 | 1.229423443858726 | 1.6133966022345114 | 1.0262458352764074 | 1.4819698195775546 | 1.40732342998891 | 1.7931668370571 | 1.7750554823064435 | 2.700127869677813 | 1.4259779126348209 | 1.4783818680832879 | 1.56195969400404 | 1.6380384542659112 | 1.0714954740144507 | 1.7601624562648852 | 1.5471473515505534 | 1.4794022952656023 | 2.0928735180807103 | 1.870003291736483 | 2.349385486420554 | 1.2075641065386402 | 3.677211708245454 | Constant | 1.1212324598369825 | 2.8353374572983476 | 3.6509224225245474 |
| 2.7605809758140945 | 2.2266327777535633 | 1.4616812493190459 | 1.0361109266159465 | 1.5519493687721269 | 1.4424556119404015 | 2.374517774201184 | 1.0243049996954292 | 1.0811335194938452 | 1.5400520454551339 | 1.1910235804233955 | 1.0948708408245156 | 1.011155790856307 | 2.030797722768261 | 1.3742159426990297 | 1.263175744674106 | 1.015472206134162 | 1.4054650081863362 | 1.238531316098287 | 1.206804148332772 | 1.6713851597811569 | 2.2442247338660763 | 1.1924283709875731 | 1.1518317303232453 | 1.1069028975563793 | 1.6843793639097344 | 1.1242784934801584 | 1.4468576989282087 | 1.0668878626994274 | 1.3645016453866226 | 1.485818822097729 | 1.1022353280128736 | 1.0833916414425266 | 1.0573027784170281 | 2.6455835554669873 | 1.0208188915155154 | 1.0443668669744837 | Constant | 1.2178110725017453 |
| 3.5921307540238945 | 3.3261163704366226 | 1.9187184873055756 | Constant | 4.216483538500998 | 3.2361450608830618 | 4.9529156773674385 | Constant | 3.084560871689006 | 1.8175405945702598 | 3.362972822087096 | 2.0224374878153055 | 1.2410637443126777 | 2.6998009987716163 | 2.3851786218601667 | 4.097602264873229 | Constant | 2.875755250674238 | 2.7691625301792273 | 1.6896535066067873 | 3.581244361860038 | 4.498582497265586 | 2.3585085993406514 | 1.1814197799631014 | 1.0812441782789586 | Constant | 1.5104868761234742 | 2.0658206741023526 | 1.3346561033899795 | 1.1435775333324592 | 2.605512519929193 | 1.473285088687171 | 1.2886025958092377 | 1.2017939564836522 | 4.726370572631432 | Constant | 1.108221463897846 | 1.2320210853719575 | Constant |
| 2.9317605292328426 | 3.6327151693752873 | 1.7106375355498147 | 1.1522668874201947 | 2.0227426040631236 | 1.5726799552429298 | 3.2038214322436915 | Constant | 1.3247465259435074 | 1.6604136997454617 | 1.4341260282077641 | 1.3765079351890108 | Constant | 2.0872541608404203 | 1.1963170776270178 | 1.6802712884902746 | Constant | 1.5553139426610256 | 1.5045408820623198 | 1.3256795336885945 | 1.7693814238722387 | 3.0705861812896846 | 1.508523761190798 | 1.3261809987486919 | 1.464914709131908 | 2.007430860840051 | 1.0635253314737099 | 1.7196078591421171 | 1.290291976153571 | 1.1018628211446773 | Constant | 1.1724051945958107 | 1.1480160465188227 | 1.196927530421433 | 3.2110206833564563 | 1.106098250379496 | Constant | 1.0643474890458993 | 1.6529288049239312 |
| 2.1701519266152034 | 1.7996509842854336 | 1.4506444470720181 | 1.129997380196486 | 2.4390690278079825 | 1.4276818585662217 | 2.6698062039919006 | Constant | 1.593037051502491 | 1.2323138437892376 | 2.081160274748997 | 1.172499040416016 | 1.1859549356149044 | 1.7242388676421583 | 1.2037231077656707 | 1.9731473758878992 | 1.0159149219692223 | 1.7527078814246637 | 1.7136328389518447 | 1.232642926290537 | 1.7478272374964734 | 3.565724385527301 | 1.2944819386356545 | 1.1839187397633362 | 1.1850371582196682 | 2.597291260731554 | 1.224732795535469 | 1.2740013304369509 | 1.2198207801285545 | 1.0556595176013643 | Constant | 1.1429031611060867 | 1.1281909901220115 | 1.3917958324735524 | 2.2752656559078255 | 1.2319267485900156 | Constant | 1.71603351611457 | 2.6195894022600448 |
| 3.168214077179189 | 3.8794235273223965 | 3.6511474700604887 | Constant | Constant | 2.3486553720401857 | Constant | Constant | 1.3257456683393853 | 2.33168583357705 | 3.1997852912032787 | 2.430540852671346 | Constant | Constant | 2.061766756937628 | 3.905078710494897 | Constant | 4.676781184844802 | 3.764277383425689 | 1.5470959505882775 | Constant | Constant | 2.544627539442361 | Constant | 3.8945050760498368 | 2.928775268745342 | Constant | 2.463104531424506 | 4.967435665457515 | Constant | Constant | 2.338926324140215 | 1.7876233696962747 | 1.6543720952705445 | Constant | 1.7406725588884557 | 1.2884488969613959 | 1.7467798754442847 | Constant |
| 1.8403860557244354 | 1.687284561080991 | 1.811791026658677 | Constant | 1.8229164023933435 | 3.3190367353478027 | 1.791016134098298 | 1.1107792503667648 | 1.2796257788077434 | 2.671521836022399 | 1.4033047318327985 | 1.078337975086483 | Constant | 3.3568225516050445 | 1.1350027779402045 | 1.7604524275873659 | 1.019028195169648 | 1.5306537627566832 | 1.4618908966983972 | 2.148218534756493 | 3.231151803234838 | 2.0687519246971675 | 1.6842278049636643 | 2.8721041517025623 | 3.518951743053285 | 2.11031332694114 | 1.1861030384412032 | 1.4985545927585269 | 1.3326984447098371 | 1.4149053270303995 | 1.56184845895791 | 1.9104082206097261 | 2.6511653020604906 | 1.1753935416938488 | 2.3096326587838227 | 1.147578911078637 | 2.1068575135344747 | Constant | 1.7756493200721444 |
| 3.2913554252295913 | 3.3841495508402466 | 2.5235306945010243 | 1.0730518821859032 | 1.6314978163845801 | 1.4086488842803147 | 3.351375986579597 | Constant | 1.0407266464031029 | 1.4050844365999071 | 1.7967372318622863 | 1.5020873583480427 | Constant | 1.6171209528825097 | 1.1858606428252094 | 1.310969741735804 | 1.0175422945929806 | 1.651535371353661 | 1.6023879348714214 | 1.794833921648571 | 1.5164469439310253 | 4.732734332425889 | 1.4971599334241896 | 1.2736492221682627 | 1.580576635900206 | 1.740348608499789 | 1.6601715165397415 | 1.4599880992794436 | 1.1522896833525964 | 1.1913772962390567 | 1.2286618167965453 | 1.120451114000082 | 1.3918040313933622 | 1.27641893267823 | 3.2347914771595723 | 1.0758638481861602 | 1.0836841078569877 | 1.3615746955899644 | Constant |
| 2.5417045873754938 | 2.6756379601289915 | 1.9747789598223786 | 1.6405650951798485 | 2.6360754869501473 | 1.8574677083112692 | 2.59947793616659 | Constant | 1.160439595158143 | 1.8705096007426865 | 1.8277274641696788 | 1.4394503714402431 | Constant | 1.9701077642464244 | 1.822884024447543 | 1.911567145462156 | 1.1144191263714143 | 1.6168513104361937 | 2.1486082781118157 | 1.5068387346752126 | 1.5727997294577214 | 2.394159402879818 | 1.842916373853192 | 1.581250863935784 | 1.2058509136838356 | 1.8173530154599873 | 1.1208542335812515 | 1.996223620804053 | 1.1510785106875037 | 1.4269842689355419 | 1.3158263068362426 | 1.9558428505692658 | 2.0920097770561226 | 1.162067426343978 | 3.394268143524632 | 1.1077517270086832 | 1.0979890700666286 | 1.259793793161874 | Constant |
| 3.009793229284027 | 3.3992297680512316 | 1.742718415942444 | 1.13333467427561 | 1.6394946060256277 | 1.3588310909723846 | 2.9681397855131224 | 1.032865826496775 | 1.0719456018053213 | 1.5804778954547796 | 1.4439727752175202 | 1.266690146955243 | Constant | 1.4992010772164592 | 1.1291375903705678 | 1.3039438387121807 | 1.0163296460690108 | 1.3463730809559 | 1.5168875914012656 | 1.1855938991751271 | 1.4779656150401312 | 2.091537254872862 | 1.3350743863637153 | 1.0788575559415454 | 1.1188122738993167 | 1.6880843217456178 | 1.0269862779693386 | 1.684941019836206 | 1.1156041027010828 | 1.2754178655208315 | 1.385331863620142 | 1.3823716609932954 | 1.154568514923278 | 1.026165907735257 | 2.8689556215687357 | 1.0328573661521578 | 1.0332563119703038 | Constant | 1.4473894908953906 |
| 2.719086163069794 | 3.4353985585940063 | 2.269186558257791 | Constant | 2.862460015680829 | 2.047043793347077 | 3.0963994266978627 | Constant | 1.2181417839674797 | 1.6912387759058076 | 1.9722457381765686 | 1.8543227234934385 | Constant | 2.352770567341421 | 1.7221407207815393 | 1.6802821535181844 | 1.0198713569065316 | 1.6113893324731778 | 1.5942566865526828 | 2.503116202345284 | 1.8511665873258494 | 2.2860217588895337 | 1.6540158997329732 | 1.3674477496397277 | 2.0986132641377946 | 1.7541554402180262 | 1.1302591253050316 | 1.646370363003129 | 1.3009206868784131 | 1.3064177758073094 | 1.3471901855495305 | 4.068808856077915 | 4.131427969113166 | 1.2184845904897668 | 3.868878808518509 | 1.3248041213154178 | 1.1781682336837345 | 2.089589112265979 | Constant |
| 2.13802303012803 | 2.4080523303434864 | 1.653132493571726 | 1.057163580997708 | 1.965577609309044 | 1.668537263221582 | 2.0994026374685566 | Constant | 1.2102383468578757 | 1.3648506627614796 | 1.2264749052304156 | 1.233552891544809 | Constant | 1.6848794554627142 | 1.3188886633706223 | 1.5745707339784345 | Constant | 1.537777586293783 | 1.3552339968270344 | 1.1699584337489195 | 1.7693077900021423 | 2.294372516249647 | 1.1781159016231073 | 1.1048639450362292 | 1.3176980079356015 | 2.0780781318606762 | 1.0856851545704047 | 1.4375584037502178 | 1.219024443688976 | 1.394257657027171 | 2.7288554449493607 | 2.0923265754977014 | 1.6951274022776692 | 1.4414706974764082 | 3.036970692348431 | 2.559419572141025 | 3.0437115321356307 | Constant | 2.9423612311311556 |
| 1.833215864979724 | 1.8752844956570909 | 1.2629860420741303 | 1.0162993376054412 | 1.635697927854541 | 1.5123548616393214 | 1.672353144088622 | 1.0986890136030423 | 1.353163781657092 | 1.198100056314489 | 1.205641676514481 | 1.3153164807604736 | 1.1064703353332248 | 1.6390167213165163 | 1.1003311638392168 | 1.4824579632730694 | 1.0113716608978904 | 1.2773778402761284 | 1.197612588384837 | 1.2082693431942855 | 1.7910467983889924 | 1.76227082724002 | 1.2137160781056653 | 1.1691693599929585 | 1.2144420617514693 | 1.6069545081025227 | 1.0503597949544736 | 1.2055266902124426 | 1.2451304154667298 | 1.3640617156857593 | 1.5122566783535885 | 1.1225765879657543 | 1.1624525599911455 | 1.0755655512992797 | 2.331606508967828 | 1.0761487608726763 | 1.136930213626459 | Constant | 1.407018069859243 |
| 1.7517588878446737 | 1.5570377829887854 | 1.9365850619976224 | 1.0637264983555885 | 2.5344687215772 | 2.1470780987570013 | 2.043428179331039 | 1.1738255560254693 | 1.4637027878662134 | 1.6628220528320086 | 1.4433313634584912 | 1.440797400566606 | 1.61983069550234 | 1.9181647662510823 | 1.4655748792044825 | 1.9857896911166573 | Constant | 1.632397817673371 | 1.6118831358204653 | 1.7345664734397457 | 2.01713339665246 | 1.8866547153859834 | 1.8408655443745 | 1.3049498662085008 | 1.693531622833077 | 1.9031409883434405 | Constant | 1.6234727301232201 | 1.2094863821483215 | 1.477976000327067 | 1.617016209481409 | 1.5760171124177202 | 1.805127809407423 | 1.2225888409948906 | 1.3228286075449298 | 1.1129584502978591 | 1.3373751658583761 | Constant | 1.43653988817954 |
| 2.5814019866647637 | 2.0282974654607973 | 2.420694438798881 | 1.0147569088153194 | 2.406620134872428 | 1.4025419397008523 | 3.4441408487976375 | 1.0095932008601103 | 1.7887566741163576 | 1.3015507877139159 | 1.1812289312208795 | 1.5931222143144854 | 1.026793692053607 | 1.7093202908322376 | 1.0425751287816647 | 1.582827705686176 | 1.0100607025693527 | 1.3750074775359233 | 1.4007144064271448 | 1.4366256362888323 | 1.6924720121869705 | 3.783453669081939 | 1.1422337935361973 | 1.0794838305801375 | 1.1685441195418722 | 1.4914016865071689 | 1.032011996765166 | 1.7434260775564074 | 1.080316470932161 | 1.2286049930483411 | 1.5206832987110361 | 1.4650446325598225 | 1.580326741854488 | 1.0753280242737415 | 2.3665484519081885 | 1.0733670321295274 | 1.257257879995633 | Constant | 1.8725601689713436 |
| 2.342760631810708 | 2.468804808738187 | 1.6217262489874635 | 1.098763934474311 | 1.591250068679821 | 1.6517611377372683 | 2.934987677647984 | Constant | 1.236913162931589 | 1.6258261508005853 | 1.5780013826887163 | 1.7600929556944764 | Constant | 2.4254172437675634 | 1.4330594584078564 | 1.4133252214393688 | 1.0209280490952297 | 1.6306255936112763 | 1.4543587229204191 | 1.3497456716326661 | 2.25751211613747 | 1.9148477906041153 | 1.3270197833461388 | 1.230023928503414 | 1.3385797950463758 | 1.934426480598941 | Constant | 1.5792799671050886 | 1.2480490813131664 | 1.2591987570225929 | 1.3836355392904793 | 2.386361452902616 | 1.7408073656951744 | 1.1756348954786184 | 2.517632904172908 | Constant | 2.7613525671430152 | 1.4412245703724942 | Constant |
| 3.667116633765829 | 3.1694453198360284 | 3.4702046237013784 | Constant | 3.1666348515162497 | 3.713577417157972 | 3.9839766436925146 | Constant | 1.611324230145932 | 3.1291274209193327 | 1.640690042218697 | 3.8600910976040304 | Constant | Constant | Constant | 2.16001952563827 | Constant | 1.8817443077573803 | 2.6248526579958744 | Constant | 2.5279418899226935 | 3.4017826900200414 | 2.2102336549955455 | 1.9964593191083502 | 2.979414592865169 | 1.5691791995676843 | 1.6987827899287449 | 2.0319069996828585 | 2.346747772597648 | Constant | Constant | 2.956139611447833 | 1.5300991384766371 | 1.6030287004869745 | 3.9209524422272706 | Constant | Constant | 1.2506791755049007 | 2.979714492595595 |
| 3.011469942502774 | 3.2791985120389677 | 2.010918965601874 | 1.0613654688312733 | 1.7661929662844356 | 1.7009088532808718 | 2.89185276286977 | Constant | 1.2759935471020332 | 1.6080363263488207 | 1.913577053712033 | 1.197885658172786 | Constant | 2.659784819206754 | 1.6049121201791121 | 1.7759812727796236 | 1.0319422865001617 | 1.4718748153074657 | 1.3783337015749209 | 1.5106619347538375 | 2.6890696592424987 | 2.6488424771976034 | 1.6029583319063931 | 1.2191565539731029 | 1.5603502003823753 | 1.8879736418888833 | 1.44027638479987 | 1.6980217586709316 | 1.7632159209279035 | 1.2064642443547675 | 1.2557173660806413 | 2.750329252367958 | 1.2288742690431624 | 1.1886451152947213 | 3.1732057027831027 | 1.0076868116604834 | 1.0723961770672454 | Constant | 2.1905932051726134 |
| 2.8073669717230847 | 3.252056112783346 | 2.305409592820768 | Constant | 3.195047057756087 | 2.319333437531431 | 3.282461556611952 | Constant | 1.4528787208343668 | 1.6842424140711798 | 2.184647506282065 | 1.6478735478677091 | Constant | 2.00790318809826 | 1.6759680156020647 | 2.370341699024242 | Constant | 1.6949582815818611 | 1.6010039202801076 | 2.2375218097422804 | 2.2915954749677585 | 3.671827076233484 | 1.539031798006346 | 1.1178621279834704 | 1.7286145165643583 | 1.8324450405263044 | 1.4288810422932714 | 2.002200089242566 | 1.4968372510071482 | 1.2866917108102423 | 2.975544339787471 | 2.7463691729374418 | 3.0108759749718503 | 1.2165087788443285 | 3.131938506198443 | 1.1660991138600751 | Constant | 1.3063104939242833 | 2.934345133069164 |
| 2.4349531752599307 | 2.663370150773845 | 3.5710733014020968 | 1.0642284957636456 | 2.963446909503932 | 2.212802171037218 | Constant | 1.037272312902172 | 1.6582028190388542 | 1.4129279678615057 | 3.0457895749135724 | 1.7598207529588201 | Constant | 2.0539603018022574 | 1.189563266011751 | 2.7548287391358146 | 1.0159834079738224 | 1.7100617420570265 | 2.2680929771882545 | 1.6114524029953246 | 2.0836657949343853 | 3.439490592683434 | 1.4300789661214295 | 1.3218488285395014 | 1.6540473644299953 | 1.4648762482430886 | 1.1884337293479197 | 2.077846983610646 | 1.2616697025037376 | 1.0845709334511329 | 1.1342589487204806 | 1.8217828891096872 | 1.5519890113141916 | 1.1113417930565987 | 2.358118286517313 | 1.0250558887356682 | 1.0240371762900444 | 1.2313030488412489 | Constant |
| 3.118735241306128 | 3.1128282318357163 | 2.225536275039304 | 1.3895212213192896 | 1.8115046394798728 | 1.5887324591654899 | 2.6996602464736354 | Constant | 1.0537708123629332 | 1.3393057027018713 | 1.3830085072569007 | 1.385757675369037 | Constant | 1.685403794418351 | 1.3448979840571764 | 1.627597016611257 | 1.0110853047584119 | 1.3363589626202887 | 1.495930401864577 | 1.7080309138058531 | 1.4575568741005367 | 2.8740157897681686 | 1.295143889213699 | 1.164359998168138 | 1.3130859062065425 | 1.6118444812167496 | 1.055292648381948 | 1.8643780370658394 | 1.1245449528927043 | 1.2066086086447785 | 1.776845256036927 | 1.6051096668918587 | 1.2256092571120314 | 1.034070424076482 | 3.136601120723133 | Constant | Constant | 1.0651324659005912 | 1.773073765791025 |
| 2.814036419979862 | 2.9076796936888814 | 1.6794116141402549 | 1.1869876806807134 | 1.4631184764142808 | 1.5357120488355414 | 2.6561287341797892 | 1.0637003266118554 | 1.1037637023988163 | 1.0817006019134854 | 1.2509839720943685 | 1.3747325725154982 | Constant | 2.1222562974237373 | 1.3378968751947435 | 1.470556826566331 | 1.0214363276428742 | 1.313691461060765 | 1.6798081433755452 | 1.6279893905696665 | 1.7298184520349724 | 1.8049675424923628 | 1.1662601766939247 | 1.2895776190487245 | 1.804015816439697 | 1.6930112105826665 | 1.1162174840242831 | 1.6088904303306562 | 1.2723971462901338 | 1.1935275622190253 | 2.5989875140323315 | 2.911074063821442 | 1.6556378710835642 | 1.1839137776163848 | 2.7702192580244582 | 1.2392951932923142 | Constant | 4.033142570500428 | 2.012116564568644 |
| 3.321753252934345 | 3.0499109916627463 | 2.3463423398501413 | 1.0323003104679946 | 1.517556012664712 | 1.4356238988411403 | 3.62159850661138 | Constant | 2.4639266873229855 | 1.119332256120458 | 1.093270313669566 | 1.4328602936992472 | 1.1995885244930011 | 1.8593978290053834 | 1.1250418130819508 | 2.1103226434770876 | 1.0677266568211734 | 1.4373623579294443 | 1.9644990293477458 | 1.2697811652025892 | 1.6945205539653734 | 4.653471469049996 | 1.9945341728809134 | 1.2730729351464285 | 1.4812383767307282 | 1.5559934055722984 | 1.2387104507133826 | 1.4900116516064612 | 1.2362298516820256 | 1.2359732023810748 | 1.3241191456463348 | 3.0880706081373694 | 3.7674159640101657 | 1.3928098864935428 | 3.6668778571615417 | 1.0742340551462277 | 1.1480286891646128 | 1.8467484138434662 | Constant |
| 2.458124558876376 | 2.061867044228744 | 1.579442603518254 | Constant | 2.133554731779256 | 2.0300427653869693 | 2.9415484841723885 | Constant | 1.5112790252275297 | 1.6735753094331665 | 1.7938531161916553 | 1.2633547785806378 | 1.0522765003647205 | 1.9441793216388978 | 1.3206858015113072 | 1.5626357982115955 | 1.0154387628081991 | 1.5774596305304307 | 1.7629249333680237 | 1.250823714664405 | 2.278022707469534 | 3.046823462104694 | 1.5728275730324348 | 1.4036989867036653 | 1.4638434739518529 | 2.0413573358473425 | 1.1208915484930988 | 1.6940874279536045 | 1.238777754012918 | 1.1034730112010875 | 1.313647295641508 | 1.4345607208897966 | 1.2470237562025783 | 1.1423195565011093 | 2.660010266983772 | 1.0321631316218056 | 1.325146327147107 | 1.0202680198638925 | Constant |
| 3.6332053612444453 | 2.588370348639771 | 2.9817999049925246 | Constant | 4.2509133798142935 | 1.3491009740117856 | 3.549254458138823 | Constant | 3.176906467324005 | 1.138093569487082 | 1.4274438967595708 | 3.295067941577581 | Constant | 1.993081974123689 | 3.939811493343114 | 1.4970789790353403 | 1.0664339553850757 | 1.725406740352332 | 3.114274205658396 | 1.264630514843795 | 3.0806036622098656 | 3.5235768025663097 | 1.7162052234241274 | 1.9037383933515994 | 1.1833796753849042 | 2.3944195672888013 | Constant | 1.9746287430257596 | 1.1467113786929206 | 1.4089602367382863 | 1.3514211036527948 | 1.4256218174113404 | 1.2822020828942289 | 1.2986367234354441 | 4.17113493684354 | Constant | 1.202475097996122 | 1.264657769446557 | Constant |
| 3.0777999490537 | 3.2847025239631003 | 2.2963124199385523 | 1.0473890565262398 | 1.8783828774106377 | 1.494706247231934 | 2.915576245757668 | 1.0114909309742863 | 1.6227897690925464 | 1.4922635918957534 | 1.2065004067413456 | 1.5267895129866027 | Constant | 1.717197781109196 | 1.1016790036185287 | 1.649082724837515 | 1.0140456259402078 | 1.5033368509897858 | 1.5624267751928707 | 1.4285655642334605 | 1.5682284035088105 | 2.2110379704846994 | 1.4064310854064652 | 1.181873296839258 | 1.4743940236645634 | 1.8146199614871144 | 1.0409421139422559 | 1.86707915392925 | 1.0839409223859295 | 1.2635238981588128 | 1.767273660247136 | 1.25214500753725 | 1.2555758464535922 | 1.1052607119194635 | 3.2524679223230555 | 1.107783038876081 | 1.4809872449037635 | Constant | 2.0363497282015675 |
| 3.2980546363182643 | 3.5373230020000563 | 2.1571318937175827 | 1.354169055357615 | Constant | 2.0969298472966447 | 4.085271957323737 | 4.5198253381752 | 1.545671367450713 | 1.7828781010944135 | 2.3376463565489343 | 2.130883498822102 | Constant | 2.9727069549389107 | 1.2088603529571467 | 2.177425358066232 | 1.051358864469523 | 1.7008795082831336 | 2.375976941844404 | 2.8997486142593227 | 2.5921351923317237 | 4.6285560739097695 | 2.2021285253041176 | 1.7183724493339805 | 1.8657238681446955 | 2.039073945416649 | 1.6214380160753803 | 1.6766649871036614 | 1.4539084419841173 | 1.3007377027918283 | 1.9941990806940035 | 1.434547667555411 | 1.7613412204692462 | 1.5651473145313597 | 3.2549160897876703 | 1.2440117618490425 | Constant | 1.1481814538921957 | 2.4791129899534647 |
| 3.1420610198167434 | 3.919592375559323 | 1.5470797455901935 | 1.0861314094852852 | 2.32448477311281 | 2.8398848651676953 | 2.6398858253938426 | Constant | 1.0333785386089354 | 2.1055207726191685 | 1.9835272375427278 | 2.4576806063518286 | Constant | 4.5283455855194505 | 1.7598517687859492 | 1.5036495695454501 | 1.0514543672450862 | 2.943781897005055 | 1.9879660131664028 | 1.6928155148006532 | 4.945161432654454 | 2.3229926408636596 | 3.028234022460579 | 1.9162342637539123 | 2.576161267301796 | 3.9342246738561655 | Constant | 2.221955645798477 | 2.7109289873932667 | 2.948580540338908 | 3.6651592508554747 | 2.2498642682827477 | 1.994282773771192 | 1.413314049732638 | 4.372654626646912 | Constant | 1.299492083429529 | Constant | 2.5346757526365606 |
| 2.029349565759172 | 1.8529524589471942 | 1.4636322419596828 | Constant | 2.956774609976211 | 2.5565953554962593 | 2.068222865163069 | 1.0637664241380491 | 1.1263341055104747 | 1.6677521757851346 | 1.4226682356877802 | 1.260178092448198 | 1.1149467581499772 | 1.7017420818981979 | 1.2868178788749551 | 2.5407999404760298 | 1.041415089603107 | 2.23683527374096 | 1.6683281323256398 | 1.584285036580489 | 1.9947328157853168 | 1.9776977877589916 | 1.7630434801472656 | 1.7022689803569067 | 1.2136995836476017 | 2.295883970241245 | 1.2389815321670945 | 1.6335656558776903 | 1.2695015590850165 | 1.43697315025041 | 1.4741170733066529 | 1.3389997543366405 | 1.1275756336549683 | 1.2029653573479262 | 1.41107908356027 | Constant | 1.1796801272965862 | Constant | 1.2177988288491952 |
| 3.2862751094412626 | 4.341075151244989 | 2.709266805861868 | 1.7307083646335601 | 2.7087788085863744 | 1.6726134659201777 | Constant | Constant | 1.1267371802872896 | 1.6209060305619338 | 2.286278664678412 | 1.3525235362238588 | Constant | 1.8735096259461286 | 1.1133879748482607 | 2.3412170964526924 | 1.0146640565911067 | 1.5866574854936952 | 1.5627623647490672 | 1.5171278184034598 | 1.8697182334575073 | 2.2619839600673006 | 1.2306389813259904 | 1.4535782615213249 | 1.3104948207965175 | 1.8392551521683542 | 1.0183467422448487 | 1.7953171862353594 | 1.0814757222936215 | 1.235138743318721 | 2.753402735391122 | 1.2181549957128603 | 1.1302419191616786 | 1.027469214934545 | 3.6557620638668507 | 2.4810617901816046 | 2.5815406186723395 | 1.2928691014661302 | Constant |
| 1.8406159251106928 | 1.7093948517391324 | 1.6665627955707576 | 1.1668183668557222 | 2.0594113999720207 | 1.422935077116493 | 2.277427142536161 | 1.1110543336517906 | 1.5094341027965856 | 1.470332585842092 | 1.4400692001830155 | 1.183066612661505 | Constant | 2.323241030106506 | 1.3605832152636141 | 1.6610895937679147 | 1.0130061965644406 | 1.3787732556566605 | 1.2765703370554098 | 1.3144263125488458 | 2.0356348766851164 | 2.0414953683558794 | 1.256633643251759 | 1.5873150545754393 | 1.8219049790590087 | 1.6857729135215072 | 1.0923642196746597 | 1.691046477806609 | 1.3474103843396128 | 1.5886947663594218 | 1.6662147891181966 | 1.1521215072011242 | 1.3666155877538422 | 1.063745261989488 | 1.1475017959402427 | 1.2612736991920608 | 1.1682542515782564 | Constant | 1.1655347045957176 |
| 2.565963251989932 | 2.168157684304816 | 2.9992629180241064 | Constant | 3.6840941608440283 | 2.1635391756716262 | Constant | Constant | Constant | 1.4081722676465072 | 2.3957258830690558 | Constant | Constant | 3.417105764255195 | Constant | 3.82546881383847 | Constant | 1.6353679130251118 | Constant | 2.199436597687947 | Constant | 2.9676765957407856 | 3.916937088451857 | 3.279395284570849 | Constant | 4.876007166186002 | 3.1426765965071755 | 2.429276966090795 | 4.5786336388087125 | Constant | 3.2772687359935615 | 4.593227415476752 | 2.144406642470304 | 2.182249623393147 | Constant | Constant | 1.706448969479701 | 4.940477254223512 | 4.4912820913486895 |
| 2.9206414961839893 | 2.8131224182172576 | 1.5673217470212062 | 1.0876824462704493 | 1.801239611176122 | 1.49578914656754 | 1.9571310005508935 | Constant | 1.211370290644581 | 1.431527898035365 | 1.2299330055729774 | 1.2393629835183408 | Constant | 1.752185462146185 | 1.629381961583699 | 1.4611019191502503 | 1.051873189591391 | 1.4754076601133175 | 1.4487400298813922 | 1.2102559549695704 | 1.900649946366922 | 1.7722359367728289 | 1.2019046072934993 | 1.3439977863519705 | 1.2137280719708583 | 1.9769628867363465 | 1.080608270532033 | 1.5060793592185988 | 1.114010414589626 | 1.1879816017305165 | 1.950547456073049 | 1.214092052432486 | 1.2253855118149863 | 1.1454881845229168 | 3.090298218829626 | Constant | 2.231954255981181 | 1.2489739406855176 | 3.1528095700301546 |
| 2.673064000239167 | 3.1275335928563606 | 2.3813715838362497 | Constant | 1.5628509273937354 | 1.8205729652228329 | 2.9620892631751325 | Constant | 1.403668154828816 | 1.818374665351836 | 1.6070898069967303 | 1.2343479413990912 | 1.0529604367221082 | 2.174498840187137 | 1.4189929269190205 | 2.065470710258315 | Constant | 1.649792456043151 | 1.415813537234959 | 2.1610008777848404 | 2.63456652453872 | 2.276784892720402 | 1.7973065918191937 | 1.8668949274465998 | 2.029253484705469 | 1.7369418833545167 | 1.4470499718748513 | 1.904085139719169 | 1.3156658613059558 | 1.2024726017432128 | 2.6341837826075465 | 1.5989683807643642 | 1.544326845155605 | 1.25179071112295 | 3.556682051957033 | 1.4599036766741993 | Constant | 1.59340107558206 | 2.953223945270775 |
| 3.3158134814122895 | 3.639824013708791 | 2.2934974500425054 | Constant | 3.369960925053089 | Constant | 3.163046246571589 | Constant | 2.2761662622771066 | 2.186990953569461 | 2.3273035057572136 | 2.0832372217167805 | Constant | 3.3524047200108824 | 3.8333097585283453 | 2.2101402128713 | 1.0217707065595731 | 2.1245338738571284 | 2.4984657841003006 | 3.6113276763414226 | 3.0615480358848286 | 3.088334310920911 | 2.1023731032158124 | 2.741977840582915 | 1.7489173905639253 | 2.0027987029197436 | 2.12311926256679 | 2.3165210112767194 | 1.0977808051100832 | 2.0029500224493004 | 2.1104116038823144 | 1.4760374199396502 | 1.9313132908496902 | 1.349033745365247 | 3.4775434532461924 | Constant | Constant | Constant | 1.5094783563706697 |
| 3.406779822366941 | 1.768474618809078 | 2.0988638671412674 | 1.1008243390666663 | 2.0102466944170336 | 1.6318338085897084 | 3.9121172073162493 | 1.8829431880920349 | 2.6461558911413037 | 1.4135923175297034 | 1.3457943620970005 | 1.6131527780821764 | Constant | 2.1522193679795145 | 1.3700227515948984 | 2.633043783939464 | 1.0396506974749187 | 3.796861088254862 | 1.6288942591829494 | 1.1186536169111705 | 3.4223062896350274 | 2.791316730817533 | 1.5675459847261015 | 1.2952946888113779 | 1.2301442491638899 | 4.137766147551841 | 1.1423304871581748 | 1.7565477837890695 | 1.548446682269692 | Constant | 1.2134222131109742 | 1.2929856855554351 | 1.1512169240582029 | 1.9468079472235107 | 2.9752719423664136 | 1.2390890373977959 | 1.1855696296092129 | 1.4252938005684934 | Constant |
| 3.4056069727061438 | 3.211007906785203 | 3.052096716617158 | Constant | Constant | 3.3516834767216968 | 4.47849842581436 | Constant | Constant | 2.4892322076376865 | Constant | Constant | Constant | Constant | 1.891209285818696 | 2.038944617882241 | Constant | 2.884731018071349 | 3.2059787533205895 | 3.6010529208320308 | 2.9573761564874705 | 3.410334863474279 | Constant | Constant | Constant | 2.1669403104060168 | 2.165528321992272 | Constant | Constant | 1.5437921203356788 | 1.8715870728530264 | 2.0395569847671613 | 2.790812971675135 | 2.04192803728482 | 4.798375412569188 | Constant | Constant | Constant | 2.2195779527088795 |
| 2.205339528547771 | 2.261384108529976 | 2.1288850467747205 | Constant | 2.580210901092183 | 2.169029405956066 | 2.9363760006789823 | Constant | 1.2666581729203055 | 1.9116661716175671 | 2.2828883165900353 | 1.4764004408498261 | 1.0982078139584721 | 1.9261673010757692 | 2.0654926770883786 | 2.338421312484079 | 1.041042692658034 | 2.1334270201594987 | 2.0464033414612572 | 1.6700564236259847 | 1.7838373234089335 | 3.9800996724268156 | 1.5751330449101304 | 1.3712802733269718 | 1.22125669135513 | 3.852160686067967 | 1.2006600500842668 | 1.904476570979198 | 1.2163552309356116 | Constant | Constant | 1.430491004106739 | 1.4820224629917498 | 1.3266234413607605 | 2.9534525432640772 | Constant | Constant | 1.5473903893821717 | 3.1820122846808623 |
| 2.304167457107657 | 2.343637732906412 | 1.8972404442598982 | 1.253712115205647 | 1.6246761840516586 | 1.6422170668215945 | 2.570368931336106 | Constant | 1.2158920821608916 | 1.253739709611937 | 1.2847780963214603 | 1.2789130432717366 | Constant | 1.948828354081836 | 1.2662224709851815 | 1.5699604837152357 | Constant | 1.4058090746477299 | 1.4623169910151088 | 1.407409711962722 | 2.2325070978345027 | 2.0884357285840762 | 1.2666152896906597 | 1.106726825550696 | 1.3263837545780246 | 1.6944207106947249 | 1.3798361789745577 | 1.6624881963737645 | 1.2989376464673037 | 1.1988654221681334 | 1.272204807199359 | 1.4579182783409 | 1.525290788358323 | 1.2729113962465377 | 2.695699188896029 | 1.1878709908382867 | 1.0831227018318397 | 1.3446812019517511 | Constant |
| 1.859668820728189 | 2.030224852414776 | 1.2558090482471402 | Constant | 1.9080671275266774 | 1.4261273134388195 | 1.8675719700633475 | 1.2037982060594876 | 1.2886101010888078 | 1.1334436655665612 | 1.7269213252063296 | 1.0674677765919747 | 1.0155528413042183 | 2.147390778111296 | 1.1492129590237765 | 1.7396764476372315 | 1.051542830937202 | 1.5710126760018908 | 1.4724029164616472 | 1.1322022624581634 | 1.7979253118034855 | 1.8397663089074459 | 1.282605905764861 | 1.0905624854266012 | 1.1012549650995447 | 1.692930882832535 | 1.1573696922283663 | 1.4180892195624835 | 1.296355530321708 | 1.0442097613998367 | Constant | 1.3528804635219323 | 1.2823445646277527 | 1.347472305266414 | 2.403683231566079 | 2.063131809242446 | 2.2243978168898892 | 1.1554212187864659 | Constant |
| 2.4725774981725412 | 2.571468668850023 | 1.3636650381626614 | Constant | 2.3377617421156662 | 1.695784117452568 | 2.7376841012804634 | Constant | 1.1399271680088168 | 1.5966219419006045 | 1.4146319853593647 | 1.2657292858366387 | 1.0305195490150763 | 1.8402974508340473 | 1.4364798666859855 | 1.5682358630594189 | 1.017970673406994 | 1.686367429411715 | 1.9358401021501799 | 1.6337503930398065 | 1.7403723462225662 | 2.6527736583726096 | 1.4964191337911634 | 1.2638316156728298 | 1.222967549636858 | 2.071197962916017 | 1.222925711929576 | 1.646259171590528 | 1.1795328732010533 | 1.6263686127035606 | 1.943933638723908 | 1.2014885503941688 | 1.2085915745880615 | 1.2748733960183245 | 3.0028292780076145 | 1.0475539222960664 | 1.0476626932787194 | Constant | 2.0074454011852674 |
| 2.688571009605646 | 2.67864482441068 | 1.7833544469415787 | Constant | 3.1406903773810835 | 1.552495750438089 | 2.759475258672295 | Constant | 1.2061233074751139 | 1.993211472087885 | 2.8112811509884112 | 1.6024560048730094 | Constant | 2.7783559408517204 | 1.4336713721080372 | 1.4614404452167495 | Constant | 2.309367729325203 | 3.0801275604700513 | 1.8487563092892003 | 2.568003291254653 | 3.296170515420208 | 2.3831133671569518 | 1.4223101376600398 | 1.6918727037450094 | 3.1454856583350765 | Constant | 1.607758169492926 | 1.4957038252463135 | 1.2125318926328181 | 1.3144257413645903 | 1.5127264483902128 | 2.7401475259190815 | 1.2038029007586886 | 3.129229573737611 | Constant | 2.3881988253500417 | 1.1153637804373155 | Constant |
| 3.2347868723180286 | 2.751781201385957 | 1.5056631642388847 | 1.1739550830640795 | 1.5266616111500493 | 1.3959708142280207 | 1.7043144179614627 | Constant | 1.2792558626407318 | 1.2800960731650188 | 1.084447126148626 | 1.3091886250274731 | Constant | 1.7534319443372581 | 1.6774179015229718 | 1.5568624474068533 | 1.0202758972416444 | 1.504466383367737 | 1.6044582038560438 | 1.3434935779802735 | 1.7143952692517166 | 1.437462431748281 | 1.2610437515004138 | 1.174047367840222 | 1.339432827390443 | 2.0104866270865567 | 1.0855978783711953 | 1.2783781450840688 | 1.2231667405223077 | 1.287154415742046 | 1.999874706227784 | 1.8752839486614776 | 2.6018665966969152 | 1.1506244020959708 | 3.474861014478853 | 2.904789561533612 | 1.764473977625322 | Constant | 3.740245667546478 |
| 2.8398882063655764 | 2.583257911416134 | 1.7215616975257801 | 1.0547129626476157 | 1.8150893904326906 | 2.045167268661186 | 2.0465731793485205 | 1.0898427986131836 | 1.4892367296358289 | 1.2247638932161842 | 2.1444917169072175 | 1.6445098253438462 | Constant | 1.8334351651483707 | 1.3249575112729761 | 1.7610177373984168 | Constant | 1.6521253479228835 | 1.7083039983537356 | 1.582570849258775 | 1.9656889795337222 | 2.0297508822239974 | 1.5013715129132739 | 1.1882869268784857 | 1.6940840837144522 | 2.0461751031932387 | 1.3246405849464076 | 1.4556848482418345 | 1.335434285111493 | 1.1890974178076383 | 2.5875836506853425 | 1.1737247090596288 | 1.5119635663552569 | 1.279486825092091 | 3.3790874141382075 | Constant | Constant | 1.3074956872481187 | Constant |
| 3.084681659805158 | 3.319493635153351 | 2.296250116894727 | 1.0978089360513346 | 1.9627550759520003 | 1.9636423384618047 | 3.36321381320774 | Constant | 1.105066616611247 | 1.5707069112707737 | 1.8598710723040426 | 1.3832471051338848 | Constant | 2.063453926771802 | 2.1536070112052124 | 1.876766996413135 | 1.0298583366092284 | 1.5653857239916078 | 1.5345631307121042 | 1.2417186738553363 | 1.8467806284766097 | 2.82927229619546 | 1.5370602554638826 | 1.349959204226365 | 1.6943946293869312 | 2.0597089387825047 | 1.266673713011651 | 1.7483829704404754 | 1.2390351886416435 | 1.234383493956267 | 1.2798158022553054 | 1.4962672562012267 | 2.3137767599367596 | 1.1908996185173966 | 4.3857307341669145 | 1.482761107972969 | 1.609858739285804 | 1.232465310002189 | Constant |
| 2.7730189459495462 | 2.38140335704841 | 2.58256288884344 | Constant | 1.9616420456534334 | Constant | 2.4027107560043848 | Constant | 1.420127308149472 | 4.624689524039667 | 2.062523265236467 | 2.2897191903660605 | Constant | 3.256877343846169 | Constant | 2.1401631256321942 | 1.087689120647647 | 1.8153110685562628 | 1.8345661292167523 | 3.7899724673876904 | 2.8619806023588947 | 2.4946947125599825 | 1.8771243738212324 | 1.341640276710558 | 1.5064892386404212 | 2.823971688149825 | 1.2519860491711343 | 2.1918962453623525 | 1.1487471938664942 | 1.2389981875070741 | 1.3857518985853468 | 1.9144643105094978 | 1.7653006136559553 | 1.2046725503693427 | 3.0635180346155595 | 1.5770749521027148 | 1.6012317344515103 | 1.750720720937679 | 2.500813965760153 |
| 2.9391880504146974 | 3.0606987456093515 | 1.839269614086425 | 1.2097969734545462 | 1.7480266084504223 | 1.482721585895904 | 2.7220879319908264 | Constant | 1.1040618740001644 | 1.30513711409865 | 1.5738475761386321 | 1.287466586352249 | 1.027553604813452 | 1.7975756598343955 | 1.434451136901639 | 1.2841724837549653 | 1.030128928897583 | 1.414017037753452 | 1.471124085504493 | 1.386364354701117 | 1.7507392467124683 | 2.0278745553795408 | 1.3629294959402731 | 1.1962177390646966 | 1.3660089396817794 | 1.6157065623214273 | 1.020822057933703 | 1.5329302864824648 | 1.1501653962654153 | 1.244806006654088 | 1.2577706982563832 | 1.3986102814061776 | 1.127205388966903 | 1.05303345607878 | 2.9313821933808626 | 1.022505410394028 | 1.0734571324159767 | 1.557160393133077 | Constant |
| 4.061271496183555 | 3.403682480128991 | 1.8566563183029403 | Constant | 4.431716605502852 | 3.896361095184399 | Constant | 1.6965902435915277 | 2.8122944330091597 | 4.206622200669599 | 2.523909468317666 | Constant | Constant | 2.5429723417608345 | 2.211304858996022 | 2.7245997899313856 | Constant | 1.549819492979998 | 4.532794689455645 | 1.6593485807362398 | 2.192969348023498 | 2.5289763880244163 | 1.290367478416224 | 1.5131299814875079 | 2.529606030726987 | 1.9038757655084133 | 1.8639522097134174 | 2.4185286348738635 | 1.9657928063779468 | 1.4573774676369096 | 1.454463615370024 | 1.6343342706079367 | 1.418676544310087 | 2.375388795154515 | 4.304376147897532 | Constant | 1.4081218842703709 | 1.1783189918695287 | Constant |
| 3.015911884334845 | 2.756808424376078 | Constant | Constant | 4.803412098027816 | Constant | 2.0907388120442936 | Constant | Constant | 2.3857783683278644 | 1.5382111099915963 | 4.61373501369857 | Constant | 3.1323351454948165 | Constant | 3.2180933445850766 | Constant | 1.7484104795348052 | 2.9382988933040286 | 1.954031442934352 | Constant | 2.245574971164845 | 3.152712258221489 | 4.025604513082525 | Constant | 2.3559827864537373 | 1.9372924212928704 | 2.110100081741356 | 4.1189200769880765 | 1.36853592358224 | 2.598098806320175 | 3.3579876358165888 | 3.2492029390179518 | 2.810563569041398 | 3.289893227301928 | Constant | Constant | 1.322304577701588 | Constant |
| 2.7594546908880715 | 2.6566012664038166 | 2.5420528156886237 | Constant | 2.9596120797907526 | 2.8553348693950316 | 3.518400399729463 | Constant | Constant | 1.4715470152977441 | 2.650698393958859 | 1.9794446009648397 | Constant | 2.154901264941808 | 1.7947180582281035 | 1.8018307571746242 | Constant | 1.8647281314052142 | 2.0580056681613397 | 2.0540188411297438 | 2.5168791860009847 | 2.4519889951792693 | 2.107996572411337 | 1.969075335690558 | 2.491712961717505 | 2.0474977118968547 | Constant | 2.078196001621827 | 1.3423436289720263 | 1.4359515319684089 | 4.152377199494679 | 1.9883955248832972 | 1.7858111050527559 | 1.710350800770336 | Constant | Constant | Constant | 2.046609107000725 | 3.1245024545431423 |
| 3.2157145737857147 | 3.9479289473032546 | 1.7325887109946099 | 1.049339347777681 | 2.4651668466941956 | 1.2613101720911042 | 2.7965456101552904 | Constant | 1.8098167622613917 | 1.5115039890581605 | 1.0968236074061621 | 1.6190292520857097 | Constant | 1.653011769767396 | 1.9953963537277326 | 2.1014506255928325 | 1.0410354575720466 | 1.5227595925158472 | 1.6370284783547837 | 1.6014801002424064 | 1.7276348525785763 | 2.0212366706026086 | 1.7067487564396986 | 1.3573674715538802 | 1.3297358040193015 | 1.6432607537429613 | 1.4056529741249113 | 1.4389764020177438 | 1.1327133069286837 | 1.162367460137301 | 1.3687200475583028 | 1.4761458831647114 | 1.4194965259140644 | 1.095126579158238 | 3.133653532532099 | Constant | 1.2915334318870215 | 1.1075987842984094 | 1.5245002614788172 |
| 2.9645534240018225 | 1.9398488982189361 | 1.8421999491786176 | Constant | 4.141159800387661 | 3.248834365691702 | 3.5604175189873746 | Constant | 1.3463266745278888 | 1.9709593880205434 | 2.049556841386989 | 2.691483748920492 | Constant | 3.3280464378245034 | 3.147266217294901 | 4.256129035705584 | Constant | 1.9384251081230144 | 2.473158484700526 | 1.8824505077867975 | 3.6950774156868706 | 2.817662804941353 | 2.2679479353386975 | 1.5489753866106986 | 1.3482744957358308 | 2.0605103946914998 | Constant | 1.7042663226175505 | 1.3659602600573333 | 1.4740814472879344 | 1.552096924656192 | 2.4798454091066495 | 2.915138163582913 | 1.3183263895365842 | 3.1093062339654427 | 1.258671803020502 | 1.3867667430891597 | 1.1888776754847263 | Constant |
| 2.725094382495392 | Constant | 1.9943081236615328 | Constant | 2.8165224181965893 | 1.6465317611126435 | Constant | Constant | 1.4324984362478739 | 1.7571298627188523 | 2.4992778574931416 | 1.9114570145226881 | Constant | 2.006887701168272 | 1.658091198143076 | 1.8642775221976793 | 1.089021595106676 | 1.6614093710602302 | 1.8764096535634585 | 1.9361104520022812 | 1.997424112806312 | 2.667911463346764 | 2.173580072995564 | 3.5668416182149407 | 1.4590742419308314 | 2.5106513512486437 | 1.6952259598397124 | 2.154768157219683 | 1.3842049273980832 | 1.498412555733292 | Constant | 2.1700283620531455 | 2.1175839309650177 | 1.577083150021404 | 2.642961146233691 | Constant | 2.692326304659858 | 2.019251451440305 | 2.855917192505247 |
| 2.9324780005085094 | 2.6243987190362925 | 4.589488880763931 | Constant | 4.16600938943124 | Constant | 4.1885347799956705 | Constant | 2.0020735848862024 | 2.446649891427276 | Constant | Constant | Constant | Constant | 2.228539065808255 | 4.336369856988996 | 1.1868923420138884 | 3.920657084431915 | 3.3001869047919006 | Constant | 3.1592990405534156 | 4.118507132918362 | 1.826291723708908 | 1.6558420198567323 | 1.1632666652728327 | 3.380471761294864 | 2.946937949530812 | 2.2160443384463853 | 1.3343061805515424 | 2.3601300933034732 | 2.542938285100792 | 1.5100420025161048 | 3.297527020741696 | 1.3702371579230355 | Constant | Constant | Constant | 2.2266014807906034 | 2.3617685276940708 |
| 3.0329872338428188 | 3.1918924060102962 | 1.968898456126415 | 1.315590784159639 | 1.9892094423946758 | 1.6112580876311415 | 2.918221450739605 | 1.088001319386867 | 1.268488474396423 | 1.431180718615213 | 2.007047351957655 | 1.366783448786469 | Constant | 1.8537783280186102 | 1.1998779775495778 | 1.4702807484843283 | 1.0217687665754613 | 1.4545146086342882 | 1.5665169498732086 | 1.4987493532411504 | 1.8676759825955216 | 2.9312322988275286 | 1.3971581354151639 | 1.1727286111560786 | 1.285033644477984 | 1.72318988612025 | Constant | 1.5754514643687292 | 1.1093031655357268 | 1.2645853217277467 | 1.368266649709466 | 1.4215240419711639 | 1.1859763199635713 | 1.1073880914548402 | 3.1022059817576917 | 1.082545110508516 | 1.2080904624640953 | Constant | 1.5518116474481285 |
| 3.3610127116654605 | 3.4274443305788886 | 2.6051103229512287 | 1.1541542017561812 | 2.4042610652013425 | 1.901073570830002 | 4.43627437895259 | Constant | 1.8544459876860004 | 1.7508532132856438 | 2.5140576168360322 | 1.4924909982466736 | Constant | 1.8087338976797074 | 2.018992420343043 | 1.7151534569662532 | 1.0170556365852252 | 1.943605416805564 | 1.9531855718246711 | 1.7911802461727047 | 1.8415617088580796 | 3.4787586701607 | 2.072293716892271 | 2.10379276466728 | 1.5498274660446811 | 1.944393896371572 | 1.2171828468799601 | 1.9206622986940907 | 1.2819693801896914 | 1.1852286619713246 | Constant | 1.275123163527903 | 1.2717338443173505 | 1.2666295697229657 | Constant | Constant | 1.3121334431079468 | 1.0973494168441595 | 2.2668958612208416 |
| 2.507183018668297 | 3.4213759838209366 | 2.93737856679461 | Constant | 3.4656817587961513 | 1.799769116699071 | 2.798890354543895 | Constant | 1.9804725205000606 | 1.6632580144773685 | 1.867332253420279 | 2.0423251158716242 | Constant | 4.963605915437635 | Constant | 3.8192866934888503 | Constant | 2.7520868873440736 | 2.359161826846417 | 2.079894651175945 | 3.8258445914361445 | 3.0567842147606994 | 2.488823116131789 | 3.895313460297513 | 2.25382819505416 | 2.5311486801425724 | 1.7433106253890995 | 2.5520489106837294 | 1.7777255224800208 | Constant | Constant | 2.1230092033959584 | 2.1000817165475527 | 2.1324331398651206 | 3.5335665355456123 | Constant | Constant | 2.5450248731498952 | 2.7997102083005885 |
| 1.6606909294921395 | 1.8216814270792248 | 3.220632146423593 | Constant | 2.8291919910518084 | Constant | 1.5747052362313827 | Constant | Constant | 1.4244225585753398 | Constant | Constant | Constant | 4.497492097389036 | 1.607365246940709 | 2.2989690259448663 | Constant | 2.369566704501431 | 2.0393068796427944 | Constant | 1.5197895299623743 | 3.0402623009277767 | 2.143463402666151 | 4.248800537367829 | Constant | 2.9225750799165513 | Constant | 2.2097454535006302 | 1.8336812344307094 | 1.4724249981811908 | 1.79742727915481 | 1.5729774801388945 | 2.1189670942583034 | 1.3939794935614636 | 1.9957491657283912 | Constant | Constant | Constant | 1.8356537053097803 |
| 2.380750905610755 | 2.1115974425697495 | 2.2569815570029217 | 1.1748653277374825 | 3.4287700488849433 | 1.794342782512005 | 3.5715142719515116 | 1.094742286511952 | 1.977279373476525 | 1.3378286504597205 | 1.226691732752774 | 1.6956908322339006 | Constant | 1.8761138652805138 | 1.2041170812308166 | 1.4660647394056265 | 1.010339784282339 | 1.4132955796954174 | 1.577295905908091 | 1.776130782909979 | 2.3715225070902823 | 2.9962242197682825 | 1.3334961181237588 | 1.2160787615632416 | 2.05007824266557 | 2.039144357340479 | 1.0946260808519817 | 1.8700371383721477 | 1.2261492732376515 | 1.4293348929347809 | 1.6778890288558517 | 1.2179064248031062 | 1.8457341197306292 | 1.0835031336720553 | 2.604486227889874 | 1.9306899385246659 | 1.4461834273198484 | Constant | 1.5486375029156014 |
| 2.418156261050857 | 2.2683442879909803 | 1.5630721717485623 | 1.1992285554964808 | 1.9527349119144237 | 1.8068938870088405 | 2.2248858096602038 | Constant | 1.4817838104045544 | 1.4930054917936666 | 1.9808493377913252 | 1.2860478648821991 | 1.1365000980394566 | 2.203468202826798 | 1.5920649892641638 | 2.156452981919309 | 1.03320198610802 | 1.9181110607876533 | 1.7234972058367417 | 1.3143122565477865 | 1.5096943723986171 | 3.0901277714718716 | 1.461554740509016 | 1.1310337910159114 | 1.32741340024483 | 2.0747127910931074 | 1.4512641674388471 | 1.840259410329772 | 1.3720984334850095 | 1.2356816909031432 | 1.1790836298789844 | 1.7681227366518701 | 1.6264116597465614 | 1.2062400355282288 | 2.986236256083223 | 1.0619123323412483 | 1.1874190851803952 | 1.1302101889646956 | Constant |
| 3.981629862068036 | 3.7048339921725835 | 2.5109642425485577 | Constant | 2.136882001513321 | 1.7939393394034993 | 2.5291917784725175 | Constant | 1.254470850253064 | 1.652200408522843 | 1.5306791916602767 | 1.5095578482943433 | Constant | 2.258068033812823 | Constant | 1.4275283571145005 | Constant | 1.444553972027922 | 1.416173536212909 | 1.280100372931034 | 2.3691833884520217 | 2.4496157164608685 | 1.5230647975840317 | 1.3632876815318575 | 1.4252405089981273 | 1.8874206903217916 | Constant | 1.9407916327459787 | 1.4579562805504287 | Constant | 4.728120352052499 | 1.2174459859231248 | 1.3053952806096407 | 1.2866867914956788 | 4.299811650100719 | Constant | Constant | Constant | 4.371313468027885 |
| 3.256266530368605 | 2.599097379770034 | 1.6847727901410465 | 1.1243518801718175 | 1.680428342753119 | 1.5625651633637343 | 2.771766186748579 | Constant | 1.7352176753453619 | 1.755189467614206 | 1.3777406510542092 | 1.2802490808410005 | Constant | 1.888777291110309 | 1.275934692027441 | 1.7115420851231078 | Constant | 1.598988325879555 | 1.670875357272872 | 1.4563919486414871 | 1.795888913744585 | 2.9920029531564576 | 1.455292376603924 | 2.0053622012608896 | 1.3491491542308438 | 2.2033718194912444 | 1.0881412286760421 | 1.7198972288559315 | 1.1966837811516726 | 1.2003812726474679 | 2.2856632083366035 | 2.250119141569452 | 2.18571938228599 | 1.5576861476393535 | 3.328508716951066 | 1.0926849362850373 | Constant | 1.088017406455896 | Constant |
| 3.6498678917059384 | 3.252506717711995 | 2.979577775466366 | 1.4981398539547048 | 2.1536759587498984 | 2.117571409343814 | 2.391962575753067 | Constant | 1.4081225999606075 | 2.443904994049607 | 3.3577525582174843 | 1.3497336243429277 | Constant | 3.0482680040943513 | Constant | 2.4672050646332817 | Constant | 2.436991090282553 | 1.8948577709559464 | 2.6677479739844583 | 3.717100172865397 | 1.7741469259409868 | 2.340916974681389 | 1.6962471089858533 | 1.846251186763147 | 1.6852021352473685 | 1.4330242242695528 | 1.7767200167098511 | 1.343270621352431 | 1.2219058669914884 | 1.6341731741135774 | 3.4099981649419218 | Constant | 1.2332927658744097 | Constant | Constant | 4.646305961942979 | 1.9020230062270151 | 3.085498169370939 |
| 2.602915077162356 | 2.6061119496643657 | 1.519422035519883 | 1.1860143202223357 | 2.1277303788588124 | 1.9920627699827063 | 3.145247856597091 | 1.7168648125860595 | 1.2348363956796191 | 1.7383920600058134 | 2.0020104111699943 | 1.2521058939482368 | 1.0575796340342245 | 2.141972374172782 | 1.1097658015916472 | 1.8898525564417246 | 1.021128321842956 | 1.5125933942822702 | 1.756549383657813 | 1.6111791141480762 | 2.3213186100295324 | 3.490503465806034 | 1.4201971822928423 | 1.3635650154175223 | 1.3826005176757536 | 1.8229978248371104 | 1.7483701140402719 | 1.5175807462853912 | 1.508811241078195 | 1.1694987714365197 | 2.0298866103825643 | 2.0672241284661577 | 2.226708830076934 | 1.3817283581257516 | 2.8093058453705373 | 1.20977460587006 | 3.1077113197084585 | 1.1041708386874114 | Constant |
| 3.805906010145103 | 4.809382432759391 | 2.677738545019494 | Constant | 2.4865603304300774 | 1.5481490701146254 | 3.1817364282832155 | Constant | 1.0845259533083889 | 1.5245468518127967 | 1.689270843611233 | 1.6050608477366535 | Constant | 2.0203931356032507 | 1.366307912436527 | 1.842383033753219 | 1.0244176646524443 | 1.507177902507829 | 1.9626874332862336 | 1.2530321074325839 | 2.220967487638717 | 2.4637926718570617 | 1.5324234640580898 | 1.4893044763761165 | 1.4381584407369419 | 1.862351447184979 | Constant | 1.8445088460783254 | 1.3213964139289147 | 1.1750126685871516 | Constant | 1.8758403406357087 | 1.7203879098228354 | 2.8656005652752783 | 4.339835624998598 | 1.0477816603046075 | Constant | 1.2815465709067766 | 3.513480756828371 |
Note: 'Constant' indicates that the feature was constant within that cluster.
import pandas as pd
# Assuming results_pd is the DataFrame containing the results
# Create a list to store each cluster's data
combined_results_list = []
# Iterate over each cluster
for cluster in results_pd['cluster'].unique():
# Filter the results for the current cluster
cluster_results = results_pd[results_pd['cluster'] == cluster]
# Create a dictionary to hold the results for the current cluster
cluster_data = {
'cluster': cluster,
'intercept': cluster_results['intercept'].iloc[0],
'mse': cluster_results['mse'].iloc[0],
'r2': cluster_results['r2'].iloc[0],
'adjusted_r2': cluster_results['adjusted_r2'].iloc[0],
'row_count': cluster_results['row_count'].iloc[0]
}
# Add the coefficients to the dictionary
for _, row in cluster_results.iterrows():
cluster_data[row['feature']] = row['coefficient']
# Append the dictionary to the list
combined_results_list.append(cluster_data)
# Convert the list of dictionaries to a DataFrame
combined_results = pd.DataFrame(combined_results_list)
# Set the cluster column as the index
combined_results.set_index('cluster', inplace=True)
# Display the combined results
print("Combined Results Table:")
display(combined_results)
# Optional: Save to CSV
combined_results.to_csv('combined_results_by_cluster.csv')
Combined Results Table:
| intercept | mse | r2 | adjusted_r2 | row_count | latitude | longitude | has_Tennis | has_Parking | has_Alarm | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_Yes | bathrooms | bedrooms | scaled_square_feet | week_1 | week_2 | week_4 | week_3 | has_Golf |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| -127855.00168877741 | 1849043.8334700346 | 0.6105100713030106 | 0.6072040085909691 | 4397 | -247.74335445340458 | -1149.9694917920797 | 26.878714865526863 | -17.92828290721685 | -439.26816714293295 | 280.4312786674494 | -56.86077813483782 | -128.58634532080825 | 49.902412869435736 | -111.92675217115328 | 26.719641482832643 | -264.3020048784857 | 248.56365462463296 | -142.5783903555907 | 242.81779873197542 | 222.48960483783654 | 351.23578532186315 | -69.63204796105116 | 21.028659498150024 | -34.58972838270154 | 1778.7385313485897 | -168.95148308722057 | 17.489561350725868 | -7.775955952677445 | -296.48806302084824 | 636.488718149034 | -168.87766162797365 | -17.110676662069096 | 63.012089056794366 | -45.78148282637764 | 298.00692137432816 | 776.5531355747135 | -271.61101464536426 | 986.0876347900331 | -131.93238834623406 | -708.9194586045452 | -371.16525290589647 | null | null |
| -5068.981980932094 | 62606.5545276049 | 0.6074576652344965 | 0.5876100190946676 | 749 | -290.0522563768183 | -139.52568909639223 | 43.47608767285304 | 31.05213526835461 | 27.61375087726749 | -32.49669320194862 | 73.15336168802831 | 44.57785590707506 | -33.32375163278415 | -73.6759933814315 | 120.8872980942828 | -25.730952879459952 | 50.645735092697244 | 117.61486299506952 | -25.432871135755338 | -14.5146117997188 | 711.5047542886936 | 109.25977904430509 | 51.145623429501136 | -5.886668322037511 | null | 24.212583167414003 | 107.75997286996176 | -78.01440322260069 | -36.551375794745546 | -208.30970783903626 | -89.5548045841247 | -11.550000438109421 | 38.18091465655659 | 11.031244157526158 | 58.47329916289276 | 212.03186825249801 | -79.7907113054889 | 247.7866121926943 | 137.85856611485303 | -320.3354494301346 | null | 100.14794238989678 | null |
| 1193.6600269323744 | 37715.09578971941 | 0.5788756843542795 | 0.5536230798887514 | 602 | -64.68563672126847 | -14.950512997453888 | -15.723407905761821 | 85.6693747941552 | null | -71.40195643961448 | 9.36080427114875 | -77.718157837365 | -3.84696940130162 | 21.139151913644284 | 43.98189257909877 | 61.84166915477438 | 79.19600345877856 | 19.81270680069636 | 2.358233832171756 | -15.219874830637846 | 272.56413772300334 | 186.11861236464355 | 1.1100496173768164 | 7.9992683806744695 | null | -61.868463517560244 | -13.89162873228789 | -48.76816202129029 | -123.36714807573117 | null | -12.359942644868646 | 39.07586445182439 | 120.65031391022946 | 129.98192093126423 | 115.8760905894438 | 42.441056145559564 | 24.07094716121875 | 174.92082829063523 | 173.58777632412978 | null | 220.20409862130663 | 9.65413841964755 | null |
| -263846.43893598486 | 2251678.5538327172 | 0.5093369571263023 | 0.48101475340348354 | 679 | -2590.291689070392 | -4059.7187709964696 | 686.0977252917683 | -140.93479266716324 | -1454.0941111393681 | 430.6747646730339 | -60.057967666549615 | -656.544585121958 | 72.4357647115488 | -635.9224235746592 | 56.74497031906794 | -333.804514022356 | 509.00748501059917 | -636.1202349223488 | 29.185229925518478 | 287.92614129280474 | 74.49364464081006 | -592.500675794562 | 138.43417294147037 | -558.7738356926156 | null | -26.907470524325184 | -146.6312077483235 | 119.52644402234395 | -802.9846734662495 | 750.7052912002545 | 35.57799596632853 | 5795.457881044483 | -346.8581323066133 | -338.92041480217114 | 34.91883921678576 | -53.975796766535396 | -40.96458078486021 | 825.6423132940197 | 652.7509762513384 | -1439.4374615524994 | null | -371.2891727200766 | 1559.3943863547863 |
| 4044.5581003572042 | 26833.93966146942 | 0.6173742372465155 | 0.6046553753267321 | 1120 | -18.337100565156618 | 31.541352899957218 | -13.994645791805858 | 57.106376167142805 | null | -47.22830001823652 | 33.66659307406798 | -65.83067599553048 | 21.402016169170025 | 46.34523570716006 | 68.92569917792453 | 188.26596826686722 | -9.992078627668675 | -35.9066368717289 | 3.3860012781204154 | -143.918867471183 | 78.64769877619648 | 144.98502659220807 | -24.140228508535177 | 14.920365822399603 | null | -35.006950389720245 | 47.422257579712294 | -6.1755598711497806 | -73.91846559243284 | 59.4591759120248 | -91.85852578851761 | 15.885560930695526 | -1.090400381616355 | 20.604818011831117 | -1.1075864223272593 | 137.7309991964705 | -33.64456903591675 | 207.58847706895205 | -251.97203070117544 | -15.453482570426681 | null | 132.14487897008684 | -5.624342966347403 |
| 11039.86353498449 | 23716.220990500115 | 0.8393843166510828 | 0.8252877274209917 | 410 | 24.484436665550056 | 124.31503642031353 | -1.15512798305883 | -37.553238372566554 | null | 48.711510070391135 | 107.5773739953951 | -96.23206701606146 | -61.32849366380584 | -34.319346879698934 | -17.43957445461286 | -4.290532016485312 | -167.34347978035117 | 16.874702016054094 | 69.2433975786761 | null | 55.55620266468918 | 40.33352819807264 | 40.473536771349984 | 44.82330793028684 | null | -24.57105992420894 | 72.46506455070752 | -7.847408830123712 | -28.976933855321718 | 146.0584674289057 | -17.665002186663116 | -66.2725198082576 | -101.89726611003604 | -63.223590009693396 | 91.29494409499446 | 159.96033889138295 | 18.215803876292537 | 185.561874969594 | null | -145.30265858365541 | null | -437.5496918329505 | null |
| -29325.66692899796 | 64063.53799535754 | 0.5097568224691791 | 0.49181466096643645 | 964 | 368.02098579405873 | -169.00160005613728 | -7.958799525402349 | 150.16300513757588 | -84.73167425639029 | -14.326475640146581 | -19.005652160864006 | -64.93884450295144 | 50.778588890146146 | 13.637131432018123 | 344.17503773619126 | 160.47797071338348 | 253.46181258160442 | 186.20157275994245 | -11.573857140787712 | null | -16.71163670994703 | -297.66410899852013 | -44.27171063131728 | 193.4325459911289 | null | 30.997638094400287 | 235.07691689911252 | -65.6998083931402 | -135.5330945015223 | 463.34415067151457 | -184.50190966904756 | -110.25933865863952 | 76.8790044056339 | 41.19494081236701 | -1.2494231052377622 | -170.47376877333585 | 29.994665216200058 | 257.3462917735648 | null | -8.517293932569864 | -47.73447832307698 | null | null |
| 92559.24825598963 | 528643.6283043601 | 0.450272776159798 | 0.4447769892692043 | 3739 | 35.0641930276948 | 1257.968725626444 | -61.22328110369451 | 27.438560087558596 | -77.77867874746664 | 125.87970173446817 | -223.07938094506974 | -115.75316348006673 | -157.34632453542997 | -153.68373306734247 | 135.7028828493121 | -313.2731617636021 | 1.3468059809229391 | -48.57915479449336 | 107.56811247263936 | 323.6778810916921 | -75.35871613884042 | 4.585270190348002 | 237.12111439726405 | -17.336233813008985 | 822.1184121575508 | 176.3549077485661 | -17.788446155953284 | 200.87585590928904 | 13.357643064258996 | 235.95788369423593 | -76.26081423255539 | 148.22275528148725 | 49.43845357746455 | -24.56435727511791 | 226.10037087956388 | 678.8096540662162 | 118.61781347714079 | 55.97787712178474 | 787.4843293653789 | -41.83390121975815 | -67.32654818741617 | null | null |
| 16453.505092533036 | 150688.65937530802 | 0.5036166784693006 | 0.4965395301227242 | 2562 | 310.34418964199665 | 249.45499317868678 | 74.49233567147385 | -31.69703425286225 | null | -12.166823810809221 | 57.33309089630378 | -125.77436910095471 | 217.13709546144764 | -138.41410978874282 | -54.211757352622584 | -119.62494092384581 | -175.77969343427895 | -24.719449979026308 | 26.295376603315116 | 188.6483607939574 | -115.02418945214264 | -49.00959696904132 | 50.25481837225918 | 156.22864115188804 | null | -123.05484080199962 | -6.636129662881369 | -13.171810127073782 | -90.83875371032256 | -157.51195277012513 | 530.0671208989812 | -77.76033314749358 | 103.9041092883413 | 10.036071450211592 | 131.64567564253576 | 186.95202642409433 | -94.29152355754502 | 376.6493976758226 | -230.8068613669354 | -284.81858247603157 | 44.86029212331485 | null | 214.27883100731992 |
| 9668.062412212703 | 85585.89904218251 | 0.6337659455703213 | 0.6078782993112588 | 516 | -87.22569937046698 | 67.60319298910349 | -67.79796305608309 | 116.43729264076882 | 131.46771654831286 | 346.80564020959594 | -222.52387488709115 | 88.77091616405022 | -96.46008028103017 | -92.38780795513706 | 30.886439583090482 | 147.98750995918795 | 133.43618810641544 | 10.428920482981338 | 96.3654573329995 | 325.6431015775536 | 228.9826103445607 | 107.8386282316019 | -6.480966707713934 | 48.35674835515252 | -154.6028537351413 | -50.550505956193405 | -48.5324236520094 | -21.977610857017194 | 105.854561069028 | null | -93.89022895607897 | -99.69307955477058 | -139.7662676472409 | 63.0373523622081 | 7.651626271376036 | 149.65594056219243 | -19.761643368800637 | 259.7103990968094 | null | null | null | 0.3448456984993125 | null |
| -61167.46591885238 | 74242.4002257599 | 0.6938263919705878 | 0.5939871719609968 | 123 | -303.66847592003603 | -610.5929872358149 | -88.70698645992778 | -140.28520335707634 | null | -384.53796927407603 | 108.99077374202825 | -30.843363733502056 | -79.08651390075497 | 221.48034328541624 | 364.932765935026 | -114.54205968234837 | 6.770874411807944 | -31.666810980707837 | -184.18274811921617 | null | null | 441.12205179616035 | -34.07028407427322 | 209.43239757104467 | null | 224.9283374030274 | -8.620649512490886 | -136.7726787665043 | -284.0859524581558 | null | -180.10996426366006 | 126.83550706360425 | -181.3528019794819 | 132.54800013795915 | -81.26963290031074 | 151.74695309744823 | 70.54866532992531 | null | null | null | -224.48625177721388 | 97.27759226885229 | null |
| -180006.84738826434 | 592664.1799995424 | 0.5989894372154505 | 0.58372139787091 | 928 | -1683.1995813740327 | -2002.130850480888 | 151.86773522059588 | 10.77758422140065 | 7376.13951520209 | -118.12936531888757 | 23.477112911451908 | 215.9650490588718 | 195.17615713017096 | -70.39059222099891 | 163.41864397869963 | -149.245054914803 | -72.48919216732278 | -272.0501175862376 | 85.74590284312059 | 132.91207367196253 | 241.39719124905534 | -333.3077048324803 | 2.58505778434858 | 136.12896235054956 | null | 108.86168874427796 | 15.506316849891535 | 7.458357342583648 | -249.11224782743597 | null | -96.83340031721718 | -149.77445114272209 | -295.71323476118613 | -93.11816232053809 | 259.8798807967962 | 182.7968027694712 | 0.4743602342892374 | 581.5331269201703 | -409.84754346678676 | null | null | 357.1205609313438 | null |
| -23257.592395689935 | 84702.7465113301 | 0.4980339711720603 | 0.4896562349468374 | 2194 | -185.99342096382225 | -364.17219613921554 | -33.821251971146324 | 64.38338089323014 | 107.32362376641339 | -61.43562138640965 | -52.09488948516729 | -125.27318362329552 | 77.50521491063735 | 75.825175160087 | 46.6190319805352 | 120.1012711071601 | -100.68705731686772 | -45.3042979456795 | 40.66135508575728 | 216.8051729163172 | 304.16221458304125 | 243.79690410298937 | 117.68070718835003 | -7.6410786875973145 | null | -26.93313713779452 | -105.14122901954904 | -14.655403122427474 | 28.344651079347123 | 62.59668246194059 | -59.98622356178887 | -63.48008156162611 | -17.039116715259464 | 6.23461160596018 | 152.04841786347387 | 176.12743861908692 | -65.20633037270035 | 259.5019027925151 | -618.8474848238963 | -176.89423955973055 | -271.4205302989947 | null | null |
| 6012.107026925807 | 86079.91839470032 | 0.7184387530679226 | 0.6848953554189747 | 311 | 119.82203155765015 | 87.04996143775608 | -120.91360981524281 | -82.8153983069178 | null | -84.12208206650389 | -25.719034617921096 | -126.74359078848154 | -89.99150781114581 | -52.647423987936556 | null | 7.604131154234069 | 69.63125294760033 | -131.54058793048935 | 148.69458771388793 | 133.94886018276154 | 184.75962020964553 | -67.14243935858343 | -23.692212375463708 | 73.57913440889925 | null | 68.11087854538195 | -197.10798303254734 | -106.31442607365466 | 26.745247237841028 | null | 95.82123834942317 | -37.53471815291842 | -277.35995739390324 | 109.85540365728865 | -4.193938088928431 | 108.39457165501993 | -68.7144973433553 | 294.5075020051257 | null | 16.08791012337649 | null | -79.27035611565441 | 205.36126532785812 |
| -16995.752553157756 | 87758.78418972049 | 0.5259620798479592 | 0.5236666469143607 | 7679 | 194.955020341281 | -114.02816715960445 | -49.31290002864634 | 56.04558913692297 | 6.431637540880135 | 91.39987079923625 | -44.99237041765729 | -108.87833352704908 | 77.19875210262927 | -20.78519758169586 | 20.533373437391006 | 181.75538684037974 | 59.12404275091288 | 39.49748899045819 | -3.7332649981819364 | 99.04608592959241 | 134.1957385587588 | -5.935189595529877 | -41.94818195266815 | 5.966523037731125 | 388.5981347396176 | -157.3578186804136 | -21.90532103264095 | 22.75324610179111 | -32.276315023770564 | 29.314990347780682 | -33.129418646347375 | -103.88924043626325 | 35.009996789634926 | -0.5992959132115635 | 10.322892596851236 | 120.32814774418037 | -92.81846103288413 | 329.9119581438271 | -141.01346529728673 | -195.17350197807278 | -13.466307777521719 | null | null |
| 24895.940243424604 | 352553.0550753856 | 0.425331480605888 | 0.4215290422048822 | 5782 | -200.46418993331463 | 221.84378575756338 | -219.43837974958353 | -2.609730427538231 | -944.9467973442524 | -44.6401625713533 | -77.32519088157575 | -136.7917717077677 | -37.867701053984675 | -31.6919076606311 | 182.89003762337381 | 28.60843376674426 | 41.12241244716384 | 116.71600059170937 | -31.141628065052497 | -161.51430963195412 | 340.6215960427303 | -159.07893672404506 | 181.41189323395812 | 59.354492194751884 | 1489.8022701340574 | 24.437849459816572 | -39.93280617408875 | 121.82502692448486 | 104.21154862264076 | 324.8178881327685 | -124.3365695896661 | 73.64460411414917 | -62.68914872770554 | -103.86842345357242 | 273.31839960227666 | 380.7852542993411 | 66.31949691500238 | 230.8263068464096 | 549.0979090555306 | -70.72045749813721 | -152.739334086386 | null | -1003.8367850951738 |
| -608.7960785441228 | 18108.12640907924 | 0.7885462465004393 | 0.7659481354394175 | 291 | 6.953038931083352 | -4.818250851030314 | -303.76597346376855 | 71.88200311497046 | null | 28.918461349577534 | 140.73108883500953 | null | -4.1545191991286075 | 110.60721318121347 | 38.74595804465225 | -52.4094349057115 | -26.92922368437239 | -183.1257536563169 | -15.910475906885573 | -19.710554989365495 | 209.55162259306223 | null | null | -53.13079342947595 | null | -147.6451061399629 | 88.64426854747802 | 210.78423034938848 | -734.2944201932002 | null | -213.2597778260777 | -229.11847303960175 | null | null | 271.3978139937758 | 32.642806686630806 | 51.51212739298165 | 248.75655656363386 | null | null | -45.65958305224388 | -436.00385788089653 | null |
| 3996.3280574027003 | 19762.91358457021 | 0.8855100264589318 | 0.8135449002331175 | 58 | 84.114462472095 | 67.82335140325192 | null | -104.8061739031535 | null | null | -133.7657970910105 | 135.78825475787332 | -97.99840434408993 | null | 42.054965898294675 | null | 243.97431674876586 | null | 174.33161420062262 | 841.4172223128471 | null | -62.95980204715795 | null | null | null | 136.30751281265836 | null | -217.4752372341451 | null | null | -277.20576498331246 | 482.7100315066943 | 28.021660929083453 | -89.79455970624075 | 115.11983851740511 | 35.20108298189241 | -177.6355387170641 | 429.12623301843496 | null | null | -43.36512732172825 | null | null |
| -3081.243951074328 | 18040.59695370475 | 0.8303296471012408 | 0.7384248726144129 | 75 | 75.75764838222656 | 0.34591090899183286 | null | -169.3571637683754 | null | 50.01503855503857 | null | -85.81541216303773 | -42.256653218339224 | -90.87499255627863 | -0.23425724075599821 | -87.93564474924786 | null | 421.5006626581512 | 103.93497132707171 | -171.46237290404719 | null | null | 138.88907657879136 | 26.91821573903596 | null | -152.9623934947496 | 48.49526757963568 | 61.23764642594715 | 102.9602401099396 | null | null | -434.8577604006764 | 26.84451687295537 | -58.05995534082977 | 95.71952173502302 | 69.01009571478376 | 74.18688556792297 | 153.39557068171067 | null | null | null | 28.221591233589788 | null |
| 14671.25676117024 | 65680.31800295657 | 0.5460274461559416 | 0.5329863091278126 | 1326 | -118.09568489818473 | 131.20632881799136 | -63.15530016086847 | 95.17579462668866 | -69.11442893150173 | 225.31863817621792 | 56.577802794339085 | 18.588683688195204 | -65.43175354828827 | -44.16724722182292 | 12.06026080643616 | 53.17836067039356 | 30.914161312229414 | 25.364661968164036 | 24.242533160434554 | -3.0741777962923225 | -15.426658285991678 | 109.49728329961675 | -26.17195586902363 | 34.114051997594395 | -77.56781339080983 | -38.98599713343423 | -52.16267963269723 | -0.8318097454418678 | -51.18396270477531 | 104.79702812959114 | -183.34051273555434 | 48.383510100756375 | -82.63946704374918 | -62.57237813099958 | 110.9889624377582 | 135.3311795824774 | -21.969779040791664 | 204.2020233049446 | null | -362.0883849163261 | -175.64104319399772 | -34.85472661665536 | null |
| -30615.053050912626 | 191282.2930438603 | 0.4474289349113628 | 0.44408588436632734 | 6320 | 194.58853701807857 | -219.27263897411314 | -124.8280557549765 | -32.5298042569493 | -1029.137278786393 | 131.51904945559323 | -24.19172540545493 | -109.43630130796986 | -1.1756584255734042 | -38.32996850139801 | 81.46372495985955 | 14.373131652103732 | 17.02340563332121 | 133.743129349206 | -8.857585367656052 | 95.74327066855041 | 208.7591937064857 | 17.563582326290163 | 41.68905054718938 | 140.92379318222572 | 1158.0593060346505 | 59.400877794339195 | -0.7777792568137267 | -73.27629492226032 | -101.4447191687666 | 41.54952145501563 | -174.2411587019718 | 30.765283111730767 | 52.08148557969623 | 9.362368960955576 | 302.42865566474995 | 112.50325430040922 | -62.93045180814115 | 429.26195802324986 | -326.8534221174483 | -319.30261540569785 | 106.81704131731165 | null | 30.17979474318825 |
| -7854.169122921872 | 45150.060127651246 | 0.8002200843845761 | 0.7023686971443686 | 74 | -136.05222240418541 | -126.84368810236342 | -701.9676039056918 | 273.5475717378171 | null | null | 300.8671009843997 | -396.45299943412994 | null | null | 206.81063712580627 | -266.4090226240954 | null | -514.2077251056334 | 176.10389783112026 | null | 57.72779616907995 | 483.6657644822021 | null | -32.11288518715441 | null | null | 204.74980872014223 | -226.18261476882117 | -200.3343809631995 | null | 194.27770385114633 | 126.85903459578421 | null | null | -54.230232775992405 | 207.1562019462489 | 122.3889671273643 | null | 21.427362993212533 | 156.5209878807991 | null | 312.07638928590205 | null |
| 3032.0249267044746 | 165738.23347408537 | 0.4891064660019505 | 0.47223733987937344 | 1096 | -24.190803256989795 | 22.39213185560908 | -7.845285482146391 | 70.76512245057583 | 996.202558910742 | -112.01611929806975 | -86.40263795606204 | -51.449246227729255 | -9.003635130255448 | 48.19902362448918 | 19.094060268044057 | 193.84242691100948 | -68.0312285959456 | -10.095902452677588 | 61.69949566246301 | -294.5673585478298 | 788.8866606468916 | 85.8698223528604 | 35.9735312396012 | -72.72970175125054 | null | 137.59969288259936 | 33.49569284483767 | -54.26367716823941 | -214.90639354001826 | null | -95.22944975852776 | -35.93676222939371 | 118.6829052788152 | 53.27775644531166 | -126.49730931284297 | 297.3065691412257 | -3.040757904128354 | 202.14458562509282 | 207.77154031510793 | 177.30024899671227 | 209.07701641345682 | null | null |
| 31257.96396741478 | 101247.17185797503 | 0.4992427041675602 | 0.4733786257601459 | 734 | -115.11448111704476 | 329.7514798406049 | -1.3873660355409823 | 85.78257609992698 | -363.0232158785785 | 559.4018246437148 | -50.03191104712606 | 27.094624033213645 | -145.05254157216078 | -51.25133819386554 | 41.38691462187158 | 235.44690741773448 | 131.0847971825296 | 184.81277124996924 | -74.59319529226626 | null | 61.476758916369256 | -37.26694278452037 | 156.07533981596146 | -20.55148865330696 | 279.1585358498364 | -16.731182693235766 | 158.34185485301737 | -85.319591694487 | 7.866195854852455 | null | -89.71085979936935 | -29.19134722346237 | 129.1682021726421 | 95.18166228217474 | 56.57761956134036 | 229.91341538053834 | 6.571820140119768 | 153.60187195578035 | 119.00132547167723 | 47.654157753268855 | 355.32630294098095 | null | 1284.161675661168 |
| 6142.2053572210625 | 82658.65230649039 | 0.4324116264959923 | 0.41615382978231386 | 1222 | -40.68291514022358 | 51.74555202993294 | -59.05710124708211 | 90.8578022814816 | 316.0769639540641 | 18.68476363964589 | 65.2366272444654 | -200.33618157957818 | 22.435095680249564 | 66.9089516374852 | 176.9168399424788 | -4.660180733599853 | -28.473950081707596 | 184.69601277921976 | 10.731625392976504 | null | 594.2853192722105 | 80.36897566971552 | -10.73342498209394 | -23.137224407738596 | null | -145.1770093812542 | 9.498880219255737 | 8.078997663801673 | 61.97023313686005 | 198.0649637597132 | -26.70384294870644 | 56.97591929372846 | 36.240208456525544 | 4.715627990132369 | 102.37550576399178 | 89.26581810719998 | -96.07550741818837 | 293.67890504174585 | null | -161.36999344072842 | null | -198.2520571323649 | null |
| -27996.505774520843 | 43819.86678339753 | 0.6836093146526023 | 0.5448414702019894 | 83 | 146.7669605398766 | -192.25802098155438 | null | -37.913037716492056 | null | 3.7669269304666813 | -75.21681367798003 | 266.4134428988489 | -195.06000159835386 | -350.7107940772106 | 185.24702811016493 | null | null | 79.03923012937774 | 277.09417433352195 | 286.58838372997013 | null | null | 95.1996921677725 | 14.351158415801581 | null | null | -153.47462944516636 | null | 240.58204606353547 | null | 314.7810073184359 | -41.55035110499555 | null | 115.72541676238077 | -277.46363227975564 | 300.1121635417133 | 23.041692688710867 | null | null | 84.8837498277975 | 213.10460655657775 | 322.0993393971258 | null |
| 187.7934651530643 | 66497.99324651515 | 0.6854814541123011 | 0.6712221936122111 | 808 | -93.12639520198636 | -38.67280339515656 | -73.24849151812116 | -27.989679269085947 | null | 24.401812106082076 | 371.10221938395034 | -231.24820672205706 | -41.75207460588194 | -72.20848311797378 | -143.00725438664253 | -79.14068772980686 | 249.1884505429181 | -50.62987219901378 | -34.90749414998128 | -201.8413113479131 | 252.80864194755043 | -129.38867588473056 | -166.10350128952552 | -132.01158490205282 | null | -171.61399002089055 | 7.83847224259303 | -55.81762261130847 | 44.57385137274664 | null | -104.63388845083337 | -56.780055780624004 | -23.195150266464363 | -30.048437422490593 | 36.76948344087037 | 108.73136840647882 | -148.3012013228779 | 303.50508481220356 | -116.62402372341943 | null | -147.26220450675828 | -103.79772991830163 | 711.4694095566483 |
| 8025.502776188111 | 31064.38427727123 | 0.7208241974514662 | 0.6917617322019343 | 351 | -145.0298509289631 | 18.59697111344075 | -102.44286914288581 | 98.73401267121268 | null | 119.94082510239193 | 83.59416000930153 | -55.38035695224312 | -102.94935571500281 | -51.31375760000248 | 78.50379422980483 | 365.1318354315521 | -60.51710351349428 | -134.4128311051792 | 75.35479807808018 | 44.135094079948516 | 120.4034602402705 | -27.207452330997572 | 50.52429902485757 | -15.077127543839264 | null | -34.50923487065684 | 0.21654648401348697 | -4.587419499994698 | 48.07732668923542 | -97.65123666035034 | -65.7436037172636 | 49.13222321362212 | null | null | 38.123771298142046 | 113.85140244920004 | 2.925831731633209 | 210.9051232948858 | null | null | 11.979695713433555 | -0.09084034620979467 | 59.840409228917885 |
| 9303.281001631884 | 342705.12537578517 | 0.4619059102976184 | 0.44901742910714226 | 1540 | 13.487314309174867 | 110.54062020297023 | -275.7685583341575 | 156.5414335937323 | null | 268.28007769363614 | -248.54029046984655 | -272.4094797197432 | -238.53181882052982 | -0.4881211054426734 | 142.3508718147101 | -9.016692229864113 | 267.7095504547096 | 38.10235718229619 | 115.55059032537551 | 164.4733904627463 | 484.788943441959 | 147.62183546356746 | 275.57789845167855 | 0.6306874345813488 | 52.963754151981256 | -15.565720659134197 | 85.62588261701696 | -9.551233473954115 | -23.49735139812597 | 179.83071187737795 | -86.36487173697863 | -19.719326119686357 | -408.49469195532237 | null | 20.835756192426043 | 219.5823675307377 | 47.38869189179017 | 326.97264638366727 | -77.19644581155485 | -26.787219493334995 | null | 63.9419604075221 | 269.7399603984487 |
| -9259.249797416009 | 23268.871312906875 | 0.751102512659515 | 0.7365329036444622 | 652 | 211.114907779793 | -23.92897072224075 | -63.38064184269281 | 17.16780182569347 | 251.50523713294965 | -125.1344481074784 | 22.8737975041749 | -48.65801123510177 | -36.49289694155663 | 66.87061971754764 | 138.93557333865766 | 94.02102096352023 | -2.4078187089201055 | 56.08485206255919 | -34.17735047651874 | 2.1821815843720676 | -78.48956105797069 | -1.4354581090104401 | 5.668773525287247 | -45.628011172098454 | null | -29.560554815387512 | 44.6047431745969 | -10.701782474427814 | 113.84189175222005 | 129.2403859232075 | -20.818652515642956 | -1.1191985881132127 | -109.42262363785515 | -55.14942632432474 | 93.10941928689174 | 36.868457410908796 | -22.160822253174317 | 277.61415416317163 | -222.17258735725153 | 51.400069980289985 | null | -53.745017820745836 | null |
| -30332.032728297258 | 2076224.07789773 | 0.1096218425372979 | 0.0824116441768078 | 1215 | 264.6322748719414 | -188.19816025215934 | 73.14077055465641 | -101.14640249643284 | -217.85324086477013 | 195.42872700742114 | -34.842570866614466 | -238.04744296733313 | -18.368029215260925 | 580.316439634901 | -124.03422938626666 | -252.26796556217988 | -185.8134273835693 | -154.8893476265259 | 895.0380917912986 | -82.451183159893 | -563.4620890587103 | -275.18644543714623 | 100.77990522323705 | -320.7531309598197 | null | -370.74609879859923 | 21.468518542995447 | 220.08313055059608 | 49.490771787953406 | 356.82787386107543 | 131.74582046622928 | -28.876448453122922 | -170.93854623051132 | -36.566119406681715 | 190.81851333435776 | 269.2588334924233 | -473.74524413591814 | 475.7700574842494 | null | 143.27471059679982 | 31.659849642138436 | -266.38856949071345 | null |
| -6458.8939283822165 | 41091.53392444322 | 0.7303691864941017 | 0.6921811313623649 | 267 | 31.27284644761019 | -57.556364691470435 | 98.47102450449803 | 87.74375390246922 | null | -111.6656286316492 | 118.5042261315421 | -2.6020483064928013 | 30.9436795388657 | 202.98194084846907 | -34.296747120315615 | 280.17423412880987 | -111.22440681982394 | 21.44949018167349 | -134.0888478284385 | null | 74.69840964717638 | -146.71597576941252 | 29.949982225898534 | 17.38282228856936 | null | 1.314432057731111 | 175.95046566952524 | 6.197849419984682 | -104.52331731289512 | null | -129.95315941925278 | 19.392350480097033 | -80.37630076977145 | -148.29534857931515 | 100.82951787865599 | 99.56754523585414 | -7.408181543644204 | 212.00360346566777 | -136.5308561911713 | 32.65715690208003 | null | 66.94679306137748 | null |
| -28857.97418663408 | 12700.628044016972 | 0.8328698959334698 | 0.8056626696900813 | 201 | 460.3328095463925 | -112.3671717534905 | -7.99951899370306 | 63.683486682487 | null | 204.90999835812846 | null | null | -76.71625472243852 | -120.9313648602805 | 121.05601377686887 | null | 17.337853651874216 | 55.279833564420755 | 89.03907635023197 | -15.99585288940498 | 51.72138121057969 | -57.58365533006031 | null | -33.679257079332395 | null | 16.737879796711727 | -14.8348652016683 | -28.93124662859309 | null | -13.69087904075076 | -35.22751443228346 | 138.4012693209598 | -34.94317596056046 | -36.1914598454928 | 30.28774902390463 | 165.98352645448347 | 157.01594115184326 | null | null | null | 231.9714351375765 | 254.26625745753842 | null |
| 29927.576848265708 | 67998.78869650172 | 0.5923916033488392 | 0.5534627115338407 | 391 | -129.2572778404323 | 310.33847603808897 | -9.279415572668427 | 53.518205457220226 | 252.72025770568345 | 135.49881590907998 | 40.44308065477281 | -87.21958961346094 | -84.7756838038254 | 60.417689084485076 | 61.899285782167595 | 34.64847510894746 | 142.89384392273843 | 144.0632104591275 | -32.39694757394898 | -41.70465463084083 | 217.37539606057874 | -125.87817011993006 | -107.45184437043918 | 227.875079342098 | null | 107.40916804558208 | -48.06299532554152 | -75.75435470033362 | -315.83709879125473 | -357.85691748209706 | -190.90633953448756 | 25.276423494439065 | -39.22743267365039 | null | -188.11125844724276 | 316.73824092854824 | 90.51959394687195 | null | null | -37.090035063242325 | 108.1541762819505 | -175.39888517637593 | null |
| 6747.194051885221 | 368750.7346612116 | 0.394857891502164 | 0.26555402216501955 | 143 | -274.2424273812633 | -78.8099587658808 | null | 23.23074453017479 | null | -288.06189596435627 | null | null | -437.0392939306644 | 161.88858777523114 | 55.177922629179534 | null | -270.86243726322164 | 828.2079231407806 | -36.509993727885885 | null | null | -370.06686387998224 | 692.4028723644346 | -45.53583899811734 | null | 148.78872311994522 | -140.8373931737015 | -272.9704898646601 | null | null | 395.5944204558573 | -269.6617452316264 | 942.3157316637095 | 19.593554664592265 | 178.92381540725296 | 454.294232821492 | 274.2498174108291 | -76.82936750018803 | null | null | -187.43701408411886 | null | null |
| 20404.825287128406 | 55978.121967194165 | 0.5788327066956874 | 0.5406644207399841 | 350 | 8.5715003314492 | 246.8641014797283 | -32.11794070249851 | -15.474037206991722 | null | -2.8842652547507535 | 14.025558354386042 | -134.02994120859384 | -137.92731295699326 | -118.4794155865984 | 87.7467586464894 | -86.71150061759855 | 51.24317401778327 | 106.20274386297797 | 101.02052642704834 | null | -93.96144970536378 | null | -123.40726219723115 | 106.80666301972413 | null | 1.6805930026255604 | -39.0487573220465 | 42.1609984550504 | 297.9093158611379 | null | -63.64947000122939 | 6.1563090801360785 | null | -275.79720983142 | -73.62274338360383 | -37.43139608363094 | 54.647156541792114 | 268.2711872532876 | null | null | -9.821855083422458 | null | null |
| -14855.705798025252 | 260002.56084018384 | 0.36846320282734424 | 0.33761226733327776 | 731 | 101.0267606132773 | -138.43077668323377 | -162.42215355923585 | 147.35409205985917 | -253.6966184196842 | 13.410653299578506 | -93.77923761106112 | -160.5183910346149 | -14.37185795416653 | 14.209871807437507 | 193.11322517060992 | 67.58293905862375 | 38.67802380456895 | 122.3092384376552 | 126.01791597912711 | 76.51100569450688 | 488.10362114640907 | 11.88243252429822 | 75.51131590224834 | 42.662405114584196 | null | 5.735524862540895 | -121.38594655489524 | 10.531099776674122 | 93.8388907228085 | null | -85.37097861486075 | -28.264692942833506 | -216.37065628948625 | null | -269.2785009873948 | 169.14587207039395 | -205.7058580281176 | 321.40675731673866 | -258.44862491881497 | null | -135.6295990377631 | 778.5230945519762 | null |
| -2601.160331479661 | 111943.27677127828 | 0.6259311797184595 | 0.6009005523390255 | 575 | -14.420048358826417 | -38.049162118015516 | -121.6751880949043 | 67.23682378601418 | -140.77980352683454 | 216.23810534222946 | -28.413481447501542 | -251.5473750209607 | -31.87419839189405 | -64.63383997173713 | 236.02173824695123 | -63.29814048519014 | 90.64035107832916 | -59.639435479446604 | 54.222255166521755 | 47.445471860762204 | -89.40957080081249 | 126.51752895995408 | -44.6212357552594 | 8.705121595595221 | null | 41.41920128594119 | -42.46839719139744 | 108.41612236389132 | 82.42634596507212 | -22.41081620872102 | -130.32610005850083 | -16.274998290328966 | -266.39410690203067 | null | 104.99085957230302 | 275.772534852302 | 37.79752357143113 | 232.34654477428998 | -331.6109677118007 | null | -57.02662149183304 | 9.293048677031786 | -80.77129599340417 |
| -2291.0158803347786 | 148537.6599512283 | 0.5164347340932904 | 0.49145862279825236 | 734 | -149.8063250624696 | -116.40532041088544 | -68.50957301003274 | 14.412605642678693 | null | -16.10131066688136 | -84.99703726746205 | -139.17014842782248 | 21.818564091418676 | -75.6208065067033 | 7.137292628352109 | 191.80616444223625 | 311.98307437968157 | -117.04508103552321 | 202.27898746063119 | -157.42841400895884 | 11.565568415553402 | -257.1695170431812 | -65.41686609928874 | -92.56326026882023 | -156.6126667614563 | 144.00390920471705 | 90.92887052290435 | -46.89370173044498 | -141.51065883801218 | 34.54637920013272 | 69.80304416058198 | -6.583527272331717 | -22.04934849898763 | 51.88978078357274 | 53.55369119515012 | 564.8974670664126 | -22.9599336820545 | 247.94298249499673 | 80.57316947543049 | -134.3967841018082 | 157.40503009786002 | null | null |
| -2432.259947267305 | 49270.55724873466 | 0.5423048647031151 | 0.5349424764677765 | 2275 | 276.6761124550577 | 96.36471878566107 | -115.3905106904924 | 77.09118302911698 | 35.83891675762998 | 16.312517684106698 | -50.36465339390301 | -117.16305087328762 | 111.78901090671006 | -70.12925249567755 | 8.025009284699125 | -29.416021664349135 | 11.292616409690051 | 49.36695292728671 | 188.15989024101114 | 79.67033044874091 | 22.560417976327816 | 5.311927971086263 | 56.67529311628662 | -3.4919146507766095 | 939.5122818216962 | null | -9.831934058102606 | 31.759695233419453 | -172.20755589283388 | -18.671965597779625 | -132.9970618058274 | 51.53356294155423 | -81.74152620331348 | -11.233983185647356 | 31.143499740408778 | 132.95987225061214 | -45.88474628275896 | 238.85011351894985 | 406.74908165487767 | -253.29246868654428 | null | -85.85698032225 | null |
| 25527.42508912528 | 69094.35079154029 | 0.5834053022717736 | 0.5747003384386464 | 1711 | 170.9589128019065 | 276.25276311801804 | -29.94678407531908 | 34.73327698061576 | -27.948216296020018 | -38.16146107689781 | -36.07678072582445 | -49.321353408116906 | -82.0037065951564 | 18.664772233254286 | 55.36281799878874 | 43.24878607885829 | 78.96023452678224 | 35.22015986959481 | 21.044636159473193 | 98.52219205158589 | 121.71420732376222 | -54.33909775712659 | 99.04823183905964 | 44.89897324770768 | null | 5.837430426395422 | 48.80171007357798 | 13.175859431233711 | -42.88865275733101 | -485.44159943513904 | -132.29773171008281 | -57.27322231078925 | -39.29815035940929 | 0.9248955672491832 | -63.28265400121269 | 102.45295827011324 | -83.56320249186558 | 311.8197996386219 | null | null | 138.59710057919398 | 447.6277669359927 | null |
| 167884.62003144095 | 400499.57737480884 | 0.5969858702453714 | 0.5794222391488799 | 887 | -688.6219055278524 | 1864.4198719279639 | -68.23337572386403 | 187.16875674327116 | -83.77990298070995 | 96.74602203598529 | -194.69709899075644 | 73.83417577897073 | -184.86846069142234 | -70.59669556461316 | 39.267113356693336 | -274.24614257925475 | -72.09830659602031 | 184.81369158985706 | -116.70755355831218 | 373.9630830426849 | 433.26523303488904 | 231.34221043915005 | 204.1902346481674 | 135.23484490125733 | -270.4495200328214 | 153.53371972182623 | -80.83912525511265 | 56.98840585543455 | 73.25883379788755 | 143.39321292349723 | -132.06658395210087 | 121.31170490320582 | 131.55491037447345 | -140.72489989012698 | -62.66854584635599 | 108.90042180082206 | -204.34146278473807 | 713.4132993153821 | 413.14657610857745 | null | 73.34159383213772 | 319.75381978573665 | null |
| 4937.383071239795 | 100476.01338400075 | 0.45037224057762704 | 0.44613022407812186 | 4832 | 170.6437645639239 | 122.54988036247963 | -18.17427585126113 | 56.40615756534723 | -50.53067568661713 | 47.86330839377154 | 25.261975627810422 | -179.10255591300768 | -50.02613442412147 | 14.377382799144657 | 32.659312572889334 | -0.3992527442575579 | 42.486174201197635 | 70.336088732856 | 71.09454076576341 | 150.4624600514075 | 290.8254006327677 | 59.0412329235891 | 21.41788237658332 | 43.6652584044308 | 786.460174622933 | -110.26215103794065 | -43.976733969583094 | 6.111898261643549 | -29.071836125788128 | 92.09806108292577 | -0.4471237215202691 | -17.39084473403107 | 126.33193222060082 | 61.34325083535418 | 191.49873036766218 | 178.54277904414465 | -66.26158769238145 | 159.95275668717852 | 79.9332331435385 | -89.14033462104992 | 340.8773104136138 | null | null |
| 31020.131102333726 | 42440.428692350586 | 0.709487851300992 | 0.6866466012804312 | 440 | -407.4555791392279 | 121.48808140808572 | 20.505076280285117 | 43.065525321979464 | null | -184.26722874463692 | null | -133.14313211752045 | 74.59053746407422 | 202.13521091711073 | 59.70778790535902 | 56.83803404390886 | 40.879579195664384 | -4.55171899926634 | -238.70851140852602 | -42.11554120768123 | 90.11256619041974 | 145.01258087822714 | 22.66585216605854 | 95.11558597297777 | null | -125.55026526616443 | 52.70256933349311 | 32.46860248745267 | -33.512875278401445 | 476.6792348976092 | -79.18337942584824 | -74.14179082164429 | -41.26696919367294 | 7.71629927127575 | 143.9445875102196 | 181.36426988211096 | -69.65428527501658 | 274.91322221891005 | null | null | -131.58666470357792 | null | null |
| 23784.4512406758 | 79471.32127393503 | 0.45742287098522205 | 0.4268551454069247 | 676 | -374.8550728986198 | 121.40864380007741 | -68.1068678054876 | 104.38323478979713 | 69.39080964886361 | 62.05274477649988 | -11.466977469013521 | -66.60701925534991 | 129.32341494795483 | -39.32678866353477 | 119.94082848795465 | 106.96909783554102 | 43.092372943600054 | 41.88529899912378 | 59.05882460337096 | 159.63364757063425 | 384.25336043952893 | -16.410299596553624 | 40.31126699696183 | -30.429423791298905 | -66.0320486628683 | -112.62270399129343 | 4.9002279906796105 | 212.74618285789907 | -416.66606879794233 | 773.3933515881959 | -214.14077640614667 | -45.08991189057026 | null | -372.0160055186541 | -41.75808337805402 | -38.692674977894505 | 58.11990111768103 | 278.4502508351791 | -13.026154595912175 | 115.18705521926314 | null | -119.84410677488569 | null |
| 60357.76710467688 | 36362.942338280416 | 0.7309045168313173 | 0.7097989887396559 | 441 | -274.66027902830257 | 521.6891344254975 | -54.26164699654278 | 11.034705582414645 | null | 29.746995286258873 | -53.17608545372655 | -67.05957263955672 | 132.0728907007528 | -60.87638871729156 | 131.50090339159988 | 203.1382035690564 | 106.13602494627727 | 424.0513682171317 | 31.63802009215771 | null | -5.487459085596656 | 54.72666960594912 | 49.1938206099524 | 111.70449381918253 | null | -232.05808392003297 | -35.29785012450751 | -136.53306888888153 | 37.1369692355582 | null | -36.00742867113516 | -56.229643263592855 | -72.14878955553604 | -37.062402879580226 | 95.29149756983702 | 60.82970551820889 | -27.68271656075728 | 272.162663366017 | null | 94.87003653214806 | null | 144.251665498647 | null |
| -1218.574666205719 | 19718.50654907504 | 0.7713002169527077 | 0.7471517926557887 | 357 | -72.257713081173 | -49.85929842719285 | -211.2636942455311 | -27.57997808055253 | null | -75.50173620084098 | null | -161.82012408857452 | 47.3086310459518 | 3.3962155144287425 | 12.811888479920523 | -251.79232353145417 | -35.28640029118177 | 53.4587481747453 | 13.560051694389374 | 642.4574680016814 | -184.83947729446714 | null | 162.10419270780184 | -39.632127925263454 | null | -66.342839447075 | -15.075180883330411 | 19.491971695840036 | -75.46542976636839 | 91.39181298779314 | -28.49977587286129 | 42.24539150919898 | 16.88502312822295 | -12.116559994205282 | 72.19954098510723 | 162.49358466930883 | -42.516484709097085 | 219.19559923220302 | -431.836434725115 | -328.1383711936839 | -234.51852042599768 | -484.60753138631475 | null |
| -1378.1015106578755 | 122978.580937301 | 0.46780440590416295 | 0.45964392642569385 | 2451 | -424.92037212084466 | -166.09303023119978 | -39.34469299041944 | 85.82114306500866 | 137.1879354187151 | 10.96311999510799 | -103.06958727206026 | -134.331147317789 | 20.037546527129546 | 17.43853041092788 | -5.69542140885373 | 59.1980499534517 | 22.67177067017284 | -25.075048100877694 | 72.06442613651546 | 368.05601958138544 | 77.22414568272278 | 79.80001453956531 | 114.95642079283685 | 16.049082514063464 | null | -175.35547272971243 | 26.39569787591767 | -54.690564208864494 | 1.5984078914082187 | 45.11938310732777 | 56.70836201347351 | -40.505241455102336 | 107.80221339374982 | 36.81130668296495 | -32.30285294835767 | 69.55812502071106 | -68.13543982064049 | 345.3918749198147 | -79.28532410007654 | -118.15562414778408 | null | -150.26170456575025 | 376.51090976345085 |
| -687.2864176851774 | 27201.90107182771 | 0.8032081174102619 | 0.7528293954672889 | 158 | -157.1609634927663 | -64.57097156693496 | 53.18285274437257 | 94.88211000107712 | null | -390.8308075788937 | -13.571367790820226 | -22.536596270165354 | 128.97695201900763 | 102.05976835275867 | 24.183318504092234 | null | 16.355487710718418 | 127.86909877587019 | 43.254317525716544 | 87.86105368654177 | -109.58681606903335 | -63.92797649347925 | -170.52296683122148 | -40.837336079702474 | 342.8793868253843 | null | 65.36478574269599 | 209.12058248849124 | -122.27793574233841 | null | -19.931488728605075 | -223.2055287365516 | 70.92213070942245 | -34.47384307137745 | 237.74220045599165 | 77.78391504120353 | -123.50343753584625 | 371.0565569736201 | null | -151.67895990788674 | null | 9.405955807434907 | null |
| 6938.6661232880315 | 116215.7657682982 | 0.5739399899456359 | 0.5641424633005075 | 1647 | 433.09647125593057 | 316.3815557253819 | -32.463561361264865 | 126.51052626120192 | -59.37678768543518 | -149.40838812609778 | -80.91252745408286 | -159.28031263162242 | -118.91828803480813 | -102.46886939517145 | 106.44041803753595 | 178.7174133754075 | 93.91654193648633 | -12.751590569550217 | -118.41420280450367 | 653.6836331076154 | 440.1067770401045 | 249.35439935339642 | 237.513309032413 | 73.06687039846894 | -114.55340526512461 | 126.70745597710079 | -29.949335880369663 | 54.16318289781627 | 11.610120114083086 | 169.15082423494735 | -138.24339328208066 | 81.17005363532591 | -47.12990461009268 | -13.455829605348958 | -78.06494054824304 | 282.4769327720188 | 19.716827585889764 | 173.76732327955258 | 169.34473757883217 | 223.79420517240726 | 201.26715497237134 | null | null |
| -15502.325619126812 | 73901.55230078677 | 0.5231014225439463 | 0.49692153030971187 | 712 | -133.13596994274 | -221.03936406915165 | -51.492526429245075 | 99.14055209001363 | -1034.8334907260462 | 250.28890914675108 | -2.7377650507416154 | -91.41008628592331 | -131.69652215654483 | -2.6122259300146586 | -56.00075748154285 | -271.8967841405707 | 62.096651929003365 | 102.49680698807128 | 46.32264956525707 | -6.015310276357605 | 590.2594063808557 | 50.77336767516402 | -91.33018045056363 | 75.58735122621697 | null | 4.071085238559591 | -137.71920828588088 | -16.29400819539662 | -58.187780005465214 | -114.84589448120143 | -96.7137435912915 | -13.699948287770393 | -46.12521419709434 | -38.39750199423236 | 102.56655240918691 | 202.23119475753825 | -95.85116982935602 | 279.24123990593216 | 3.7004495062206666 | -92.07231422603667 | null | -369.4427400540135 | -304.83511629067397 |
| 24542.28605723402 | 56397.75208720264 | 0.5244611498496979 | 0.5173928597566299 | 2459 | 4.860278270571332 | 318.29666047084913 | -187.2242955279761 | 26.155215757094574 | -5.472976044009917 | 97.07690900153023 | -37.33216019527834 | -19.029714765191045 | -109.87715332391849 | -7.462968691484177 | -63.23536060506925 | 3.389379848847898 | -23.303153650402393 | -3.7033023569936385 | 56.69241197676432 | -115.44726308354713 | 92.05751760190728 | 28.94525050253587 | 168.8588866206288 | 76.90112267797271 | null | 12.303122094379903 | -77.0118420016598 | -85.17949749847644 | 3.183148936088005 | 68.67569501925516 | -52.22039522238442 | 3.8249754422873834 | 31.791797993526778 | 9.751608656537579 | 107.64133410796138 | 200.08908263234014 | -88.0168333214913 | 215.7126624858332 | -149.93201866346394 | -395.50781920954523 | -29.495660316043043 | null | null |
| 1605.3920724162392 | 56407.32800747606 | 0.5219145548728584 | 0.5109349022616707 | 1560 | -130.3244265800866 | -27.23076832926678 | -231.41582747446674 | 33.62475262122104 | 68.65401911888837 | 103.3788133818211 | -7.973917437881832 | -74.35954041069232 | -3.3346304401337963 | -23.82323139777007 | 86.69141339391138 | -54.36871068827173 | 58.34207185882147 | 81.2629324962064 | 54.63957745275495 | 114.80749409786476 | 319.24648549856903 | -4.1179674136955695 | 66.46879299649459 | 27.079133628478424 | null | -20.737443432206224 | -48.10284215024819 | 0.4727388274655018 | -12.87177375430519 | null | -33.13190479935768 | 12.685999793389213 | -56.697906753193834 | null | 70.13708055660098 | 67.70451540287294 | -47.710807911575756 | 276.0002047793718 | null | -25.422334338912883 | -55.969687032532086 | -28.819447499631895 | 227.30320594740968 |
| -10373.190127636015 | 55803.103310232 | 0.6350797978613998 | 0.6295390440541498 | 2408 | -70.64114247273318 | -116.10922041878504 | -128.6735233977552 | 86.37822457119202 | -12.250470123895829 | -6.149698450600517 | 26.457384545212516 | -78.44746665585842 | -114.19061968631638 | 18.59617941295564 | 42.34643731460203 | -48.64985399162762 | -17.327450364367284 | 1.8785315766122623 | 24.724105136680485 | 183.77588583653144 | 279.35461923643777 | 72.85200132778102 | -5.865034312084763 | -63.855095114385314 | null | -39.06335244599379 | -19.665433505845552 | 62.76846713323136 | -16.750684228155798 | 100.74220215092244 | -94.76898700363478 | 40.260651774319406 | -75.73573441330893 | -55.893227432924306 | 41.68564633600453 | 73.16769508983752 | -68.97732904460754 | 277.8246480060338 | 63.64873243026331 | 68.84088325646371 | null | 74.24552987159284 | null |
| 4288.826984521728 | 16377.975012302579 | 0.6790959528150704 | 0.6620783139492029 | 696 | -27.84827292423643 | 33.78820398448696 | 36.39359987796304 | 35.76378513147984 | null | 9.700567211347646 | -69.56759716725806 | -10.45860383288184 | -76.77966045776316 | -47.438668900023046 | -8.823850512396776 | -25.455922340734116 | 14.291899479913427 | -36.41841237761906 | 113.32179656126394 | 14.033142011078478 | 174.128389224558 | -10.461580521589164 | 42.03665731277822 | -35.595036672693645 | null | 23.128114436136507 | 0.12099437001570655 | 18.143211155784705 | -55.13210333835353 | 195.31093568666887 | -40.03307007322332 | 57.66550685745028 | -12.816848511920309 | 4.052015593111226 | 47.75900039410433 | 28.741972261839113 | -53.265773909818705 | 262.5440112581235 | -371.8885717210771 | 46.497213583388266 | null | -43.72098949432198 | null |
| -2837.7957363372025 | 144617.80667205554 | 0.4756820999863709 | 0.4733539765696242 | 8597 | -328.58498714006083 | -211.94428046777253 | -93.82547587131724 | 9.044461001979593 | -40.360438935848855 | 101.12000038740449 | -69.5173460087785 | -161.1797053747436 | 73.15402590693054 | -32.55627020319389 | 59.67940940061885 | 89.95995732449444 | -0.3222003674096389 | 167.16959010875433 | 39.130468707915476 | 292.10748963502897 | 111.29010178436499 | 82.69140543222544 | 82.7227428827094 | 92.51881497947652 | 251.43468737052171 | -71.60449498954856 | -19.199409016678953 | 18.872340271066488 | 86.79986869653806 | 187.9067144806307 | -77.49264895119924 | -6.87208022687135 | 73.85098302473953 | 35.86056235553572 | 113.85445806916508 | 254.08420032783633 | -48.44161409282177 | 241.23985895351896 | -177.41893328178642 | 101.18683574951795 | 115.66595989948824 | null | 510.427966359338 |
| -172.67882695124513 | 38088.04746669037 | 0.4734549597793821 | 0.4610087211398928 | 1560 | -18.79964242023619 | -14.591082680506041 | -53.66991752445174 | 49.68304003459362 | -78.8490006987192 | 140.92630232228785 | -3.793474692952554 | -80.19833514764339 | -77.0659366690544 | -44.21726757997457 | 29.42279306798854 | 52.38680410604225 | 27.841768649396144 | -26.241876012774927 | -19.53588119195603 | 223.07902645261925 | 241.28504837779684 | 106.08683128225182 | 52.49071164431117 | 52.16207128914166 | null | 23.22949464167863 | -17.084281934654303 | -37.56984045329461 | -57.08342848539979 | 171.89428213322296 | 44.18459085656508 | -50.16424494354116 | 15.134044546735549 | -20.63568003414861 | 56.97241988541662 | 82.44824688876417 | -46.349264001783474 | 199.71886794297023 | -141.72493190386962 | 20.11280012432865 | -8.398322327764385 | null | null |
| 531.8538397098578 | 42665.18494807305 | 0.7460699467049043 | 0.7234983864120069 | 442 | -119.72583188648206 | -45.42629808673888 | -79.81769573295075 | 78.53107382838553 | 98.62727338665321 | 48.175925758747525 | 39.48117713255342 | -272.3412270695344 | -68.90812987544048 | null | -25.559077298202713 | -115.76749827848703 | -12.897698370064854 | -13.210498600445094 | 86.06067986524096 | 215.02466796487198 | 181.02540948075116 | -339.1995663520371 | 170.70064239110496 | 142.95218961055306 | 814.8337391965733 | 143.47766112175503 | -77.63515288083543 | -45.545854148834735 | -238.71730046075288 | 449.0585381698006 | -114.41320014742114 | 138.7510776021374 | -145.2156979410017 | -75.7073678891337 | 59.75743477659415 | 113.9266877299749 | -145.94510484142248 | 278.7523339157194 | -82.07873871998311 | null | -28.04415741009968 | -290.68124748603356 | null |
| -15134.758273455582 | 181533.82197684297 | 0.48531669751266215 | 0.4728058272293778 | 1518 | 382.9270815633304 | 5.484436794120029 | -22.871708727122197 | 48.41557395252503 | null | -112.35113832129444 | 67.42247789367626 | -93.88944408862832 | -181.18910210693863 | 8.29204944133709 | 61.93730158597654 | 104.45877487343377 | -202.3278249418336 | -225.83820660749578 | -168.05314559589883 | 271.95393379164506 | -314.3539363490456 | 22.262385741723296 | 270.1114259823257 | 223.11961326295844 | 94.23780066806901 | 84.87180909776973 | -17.17418460851824 | -279.6866046445716 | 335.85312223437825 | 174.81851717878155 | -170.7488838637494 | 17.80890234086523 | 170.17114700397437 | 83.24691848373705 | 94.07382995535357 | 499.76568464441146 | -147.05691004381092 | 133.92794931878126 | null | -52.23845907201691 | 91.99580463184512 | null | 126.95983685398055 |
| 4876.42767092716 | 61121.11366852651 | 0.518020497193971 | 0.5092435551553597 | 1958 | 46.638128837821185 | 72.16375986546825 | -47.914561703586934 | 122.35870554028686 | 203.93990893622203 | 99.2352695282971 | 26.7596349292657 | -68.01778655709771 | -65.20407818204917 | -28.995376128439442 | 20.067400099214098 | 36.698861384459086 | -10.653561076879248 | 81.93341156293299 | 0.5290089003920657 | 43.74401539541504 | 207.3769689598104 | -23.99852153563013 | 29.52127453895433 | 25.752946938640573 | null | null | -22.5082260235883 | -71.17882079765577 | 6.793034892766783 | -0.2742765824062451 | -84.37499919476795 | -19.51596955606327 | 125.5665423782322 | 7.4236201801644475 | 10.012207385808003 | 90.04746451318489 | -35.1029015676457 | 186.7072139116046 | -93.40607115737626 | -140.20687809295217 | null | -176.34083216244272 | null |
| -61367.340321044416 | 226826.97421245003 | 0.6403385352002524 | 0.6303402242096803 | 1332 | -339.46633960336595 | -623.5007237413307 | -85.38941233412557 | 147.71915728155594 | -50.41567923198512 | 10.0596763249636 | 24.129917693771457 | -162.91439374005094 | -253.4216002281301 | -81.06448708802644 | 181.22336847895716 | -62.19866739403955 | -45.13435129362308 | 47.35188471951733 | -36.67493535515597 | 412.42690784982676 | 107.78952156013203 | 48.81116606131315 | 12.324873547711164 | -36.00688471450863 | null | 27.780426076835976 | 80.24397120744895 | 7.0022297452094735 | -52.5637447449025 | 278.17869454064487 | -64.54349668301008 | -30.627746248458074 | 204.49675768930177 | 110.37515642935973 | 149.71538415894096 | 161.9966476944709 | -38.67742278379611 | 508.0767292001992 | null | -173.3279020676866 | 215.0450925848481 | 179.1421196737831 | null |
| -13653.899972260646 | 69144.51026126942 | 0.5862816333736157 | 0.5745852336709312 | 1274 | -20.258361623408565 | -180.1088412412601 | -66.1570714803938 | 74.38288298363022 | 204.3639116803014 | 65.70848267741255 | -53.23517297805994 | -74.7742033192842 | -38.711317767156324 | 23.414526274008736 | 73.37721235895035 | 25.87337336267419 | -22.63837655599944 | -3.895334735803803 | 17.79137425166331 | 80.42428189858175 | 96.17894182574993 | -136.03851058446472 | 35.35342605823043 | -0.37598515257656095 | null | -27.710982775594015 | -23.266366359074535 | 81.20326375211562 | 129.66483719439648 | null | -49.768314885558226 | 91.65227078238402 | 20.48748121725223 | 0.377905334757924 | -196.22674788677847 | 106.21917665328151 | -11.526678268032422 | 246.20941878975455 | 120.79406634189404 | 113.01840466849798 | null | -294.25476230760205 | null |
| -17673.73363666568 | 139154.96367493764 | 0.7423420510247722 | 0.7364527264767671 | 1612 | -911.3735703593856 | -415.0748500608853 | 33.48803408530418 | 100.30703896879373 | 176.7160164939457 | 185.90783581026926 | -57.25558802318491 | -70.3253665501666 | 38.4866469027606 | -66.24630780763924 | 76.21406649005478 | -72.38083385267055 | 41.93970646025304 | 4.064755505688226 | 138.7081304427151 | 117.52475847401273 | 84.94161973456893 | -180.01283670263646 | 37.09333201882134 | 100.49398627449756 | null | -44.476600734910775 | 55.55608182796853 | 48.51250037855895 | -132.3343068504236 | 29.154010922734056 | -52.44411739272679 | -57.92427579192688 | -26.7214086273621 | -36.04759357496705 | 188.7215650686648 | 83.64460771462886 | 23.598758360220597 | 510.57634146359385 | 67.4041443134332 | 56.973073856393306 | 168.268367020253 | null | null |
| 4477.672591450521 | 49859.67990702523 | 0.5711007299589995 | 0.4655255250258301 | 163 | -100.34476644676037 | 12.615448024490064 | 82.02000370256117 | -27.10108762018896 | null | 47.118351350544984 | -14.3913879527033 | -243.92253384649976 | -27.102533921673807 | 6.206293678769761 | 138.22343472421124 | -26.08214729584143 | 66.0418551727642 | -186.54030739835332 | -8.04341905507215 | 9.362647461080947 | -34.01172397816503 | -63.15291156191261 | 51.37273742856952 | 24.476048442435616 | null | null | 1.4004833192599113 | 10.653188780709298 | -70.70542912463885 | 8.340498701916495 | -46.35643934014688 | 53.30631548279428 | -183.42745721838094 | null | 50.28759399183382 | 130.24453943821578 | null | 153.41914708451742 | null | -74.82407832628077 | 14.876479275033747 | -51.86152475249419 | null |
| -17796.755570972648 | 75324.15948242611 | 0.39623623909605477 | 0.3894204675712689 | 3226 | 218.258813300685 | -129.8974248227205 | -78.74151814085472 | 38.514786844555296 | 30.456135777950823 | 25.608229683216287 | -40.89740797396682 | -55.17130747420794 | 29.632254017572805 | -36.46423533617386 | 165.40351008523902 | 79.27664044754533 | 99.27304704066027 | 42.659168318315025 | -34.497868138519 | null | 210.77244498781243 | -75.29490821250077 | 1.5768681602608607 | -9.369575530280603 | 136.95522135443386 | -14.541033959779405 | -61.855680889584114 | -5.095606284061452 | 116.63613449061317 | 176.44140731138828 | -66.27714026036661 | 1.2229874578879685 | 62.22965169767035 | 36.8362668359997 | 263.67644101081646 | 138.48381483302032 | -26.681383386251042 | 170.5437465837581 | 39.99834206550906 | -5.65141335105205 | -63.02614199373995 | null | null |
| 4225.568148349634 | 68501.1109531393 | 0.6892352596100243 | 0.5769106546497922 | 114 | -115.78140930887115 | -16.119601810277786 | -177.4061738214088 | -45.56333760095703 | null | 272.9344681915783 | 202.88703144637648 | -280.35002979660436 | -18.297040742115847 | -213.6956277964598 | 98.58971228718464 | 434.85532488807286 | -119.64700438364623 | -18.674486463537054 | 336.12727015860344 | -305.35772408556795 | 218.06094740197162 | null | 518.3683221652402 | 87.7093964834514 | null | 56.05538975068374 | -22.001299658290584 | -134.11280526954633 | -6.170532440938332 | null | -110.05572171186164 | 52.60412021298848 | null | null | -29.71047161373933 | 265.25448384685797 | -82.10602912866803 | 192.02420666466563 | null | null | -41.66416780486817 | -51.36439257210554 | null |
| 14489.843942944506 | 44403.39239160985 | 0.7240033256743393 | 0.6831149294779452 | 249 | -421.6078808114914 | null | 8.821250408494862 | 4.676629128972333 | 175.87424773222287 | -62.13738443278895 | -72.64493154701309 | -49.129911144411125 | 327.9970432383937 | -118.81105726558557 | 377.4559680804381 | 273.24855572475644 | 5.222777862381211 | 177.46532146265898 | -12.459440088507636 | 85.72211284978218 | 157.97962067868832 | null | 145.6432374398857 | -0.8957433865230466 | null | -251.11271627421877 | -72.85812457168069 | 168.98475913382154 | 6.634720420981287 | null | -105.25149570514598 | -57.35236927355806 | 259.14623618417204 | -31.125307986679974 | 354.6434709699127 | 368.09318382035616 | 23.55273534597773 | null | null | -252.35302702408933 | 22.233816922944147 | 706.9089427375208 | null |
| -9060.405647152316 | 117856.24199536812 | 0.5790327827483543 | 0.5592592917031596 | 848 | 62.160825993898634 | -81.59766532005976 | 6.7243945562253815 | 86.57696067207443 | -262.0510740035791 | 104.80203467849974 | 120.66392308301263 | -128.63922137892158 | -253.80905202785615 | 22.536840298963252 | 96.49041635330444 | -93.90876514870905 | -24.411214820621296 | 50.37736038321856 | -0.8702088565205505 | -72.29791420124165 | 230.3228713608891 | 91.35067051428031 | 22.740848518042224 | 29.779316900826284 | -90.572396247134 | 12.68706215268591 | -13.131889311912904 | -52.15459357918303 | 60.527475305898975 | 56.1296925744804 | -1.0285750805309892 | -109.22205211706536 | 300.4638985771046 | 51.987184279614965 | -232.2923732844803 | 297.03622687545806 | 48.756397355362594 | 156.38904024444727 | 82.90727467388628 | -250.9409790689037 | null | 432.43813910982954 | 247.58773320878964 |
| -26254.838745430534 | 41348.97963689274 | 0.6466615990846267 | 0.6335307801316905 | 922 | 490.3022145050106 | -124.44344358206787 | -106.1398670678517 | 44.71013240715845 | null | 93.19115253289321 | -67.77514734881598 | -83.30583206022082 | -60.584823237824814 | 69.50758109001936 | -14.065256347021906 | 21.739782459551247 | 57.6655460530596 | -2.731415878147764 | -10.828680552692335 | null | 268.6634651080263 | -25.098207424676627 | 36.86411458889938 | -5.082540101389318 | null | -52.62245084337594 | 36.96180352822588 | -32.942442144815104 | -139.07203820152418 | -202.73001407680184 | -99.92028229130256 | -35.18798955518171 | -726.9695265139991 | null | -43.22507622541276 | 139.4809345823902 | -20.8397168651989 | 197.90169760469604 | 53.77057795986393 | null | 71.51871055536493 | 65.49092452557295 | null |
# Optional: Save to CSV
combined_results.to_csv('combined_results_by_cluster.csv')
combined_results.describe().to_csv('combined_described.csv')
combined_results.describe()
| intercept | mse | r2 | adjusted_r2 | row_count | latitude | longitude | has_Tennis | has_Parking | has_Alarm | has_TV | has_Clubhouse | has_Playground | has_Refrigerator | has_Cable_or_Satellite | has_Unknown | has_Gated | has_Pool | has_Wood_Floors | has_Internet_Access | has_View | has_Elevator | has_Hot_Tub | has_Gym | has_Storage | has_Doorman | has_Dishwasher | has_Washer_Dryer | has_Patio/Deck | has_Garbage_Disposal | has_Luxury | has_AC | has_Fireplace | has_photo_no | has_photo_yes | pets_allowed_Yes | bathrooms | bedrooms | scaled_square_feet | week_1 | week_2 | week_4 | week_3 | has_Golf | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 69.000000 | 6.900000e+01 | 69.000000 | 69.000000 | 69.000000 | 69.000000 | 68.000000 | 65.000000 | 69.000000 | 41.000000 | 67.000000 | 64.000000 | 66.000000 | 68.000000 | 66.000000 | 68.000000 | 64.000000 | 66.000000 | 68.000000 | 69.000000 | 57.000000 | 64.000000 | 62.000000 | 65.000000 | 68.000000 | 21.000000 | 63.000000 | 68.000000 | 68.000000 | 66.000000 | 46.000000 | 68.000000 | 69.000000 | 62.000000 | 58.000000 | 69.000000 | 69.000000 | 68.000000 | 63.000000 | 42.000000 | 49.000000 | 45.000000 | 46.000000 | 17.000000 |
| mean | -5635.192481 | 1.933318e+05 | 0.593290 | 0.565340 | 1442.275362 | -103.502490 | -76.985637 | -54.019427 | 41.746411 | 94.786286 | 43.185032 | -8.151864 | -100.627669 | -33.912791 | -20.366491 | 74.743513 | 10.056554 | 31.316436 | 22.051760 | 48.704407 | 110.583373 | 161.483534 | -6.254724 | 66.239768 | 18.708088 | 400.255311 | -16.044055 | -3.314672 | -6.296264 | -52.751475 | 125.262650 | -53.041872 | 74.196465 | -14.449171 | -17.594195 | 60.712411 | 175.752144 | -33.126419 | 286.244631 | -13.416755 | -108.186387 | 13.363597 | 13.209264 | 260.479285 |
| std | 51299.270208 | 4.184157e+05 | 0.138231 | 0.133029 | 1750.669921 | 439.870653 | 658.589008 | 149.864074 | 77.930061 | 1236.716968 | 161.483473 | 105.093455 | 128.948385 | 117.935721 | 141.007635 | 97.659861 | 160.345353 | 125.869482 | 187.254806 | 143.561311 | 217.606344 | 226.482178 | 177.993273 | 135.767728 | 111.562116 | 582.868323 | 111.820267 | 79.384736 | 102.220587 | 185.351045 | 246.521291 | 130.539638 | 707.942603 | 205.019233 | 96.288544 | 132.087456 | 156.405096 | 102.850047 | 165.031000 | 286.111415 | 263.170597 | 160.441376 | 274.773274 | 572.135298 |
| min | -263846.438936 | 1.270063e+04 | 0.109622 | 0.082412 | 58.000000 | -2590.291689 | -4059.718771 | -701.967604 | -169.357164 | -1454.094111 | -390.830808 | -248.540290 | -656.544585 | -437.039294 | -635.922424 | -143.007254 | -333.804514 | -270.862437 | -636.120235 | -238.708511 | -305.357724 | -563.462089 | -592.500676 | -170.522967 | -558.773836 | -270.449520 | -370.746099 | -197.107983 | -279.686605 | -802.984673 | -485.441599 | -277.205765 | -434.857760 | -726.969527 | -372.016006 | -277.463632 | -170.473769 | -473.745244 | -76.829368 | -618.847485 | -1439.437462 | -371.165253 | -484.607531 | -1003.836785 |
| 25% | -14855.705798 | 4.266518e+04 | 0.499243 | 0.473379 | 391.000000 | -157.160963 | -132.030763 | -93.825476 | 4.676629 | -84.731674 | -41.400812 | -61.987263 | -138.575554 | -91.608651 | -70.545170 | 19.824065 | -74.070797 | -24.134200 | -32.726767 | -12.459440 | -15.219875 | 54.597497 | -66.338824 | 1.110050 | -24.960274 | -77.567813 | -64.105651 | -42.845481 | -54.370399 | -117.839281 | 13.543877 | -116.894043 | -56.229643 | -81.400220 | -43.935488 | -1.249423 | 89.265818 | -72.188392 | 200.931727 | -170.547205 | -176.894240 | -63.026142 | -142.657305 | 30.179795 |
| 50% | -172.678827 | 6.909435e+04 | 0.583405 | 0.559259 | 808.000000 | -40.682915 | -20.024286 | -53.669918 | 49.683040 | -12.250470 | 28.918461 | -21.598689 | -89.314838 | -37.180299 | -32.124089 | 58.212190 | 10.988631 | 20.004812 | 20.631098 | 29.185230 | 87.861054 | 146.087680 | 1.574906 | 40.473537 | 15.484724 | 251.434687 | -15.565721 | -13.511759 | -5.635583 | -29.024385 | 96.420132 | -78.338014 | -13.699948 | -14.927983 | -0.110695 | 58.473299 | 139.480935 | -30.663643 | 257.346292 | -45.111300 | -52.238459 | -8.398322 | 0.127003 | 214.278831 |
| 75% | 8025.502776 | 1.391550e+05 | 0.693826 | 0.671222 | 1612.000000 | 84.114462 | 99.908694 | -7.845285 | 86.576961 | 131.467717 | 112.371430 | 35.120239 | -52.379762 | 29.960110 | 20.520557 | 123.667236 | 96.630459 | 68.733904 | 87.074260 | 89.039076 | 216.805173 | 269.638633 | 85.075218 | 114.956421 | 74.081189 | 814.833739 | 36.208420 | 38.872538 | 31.936922 | 47.201458 | 193.459880 | -28.050793 | 48.383510 | 68.944620 | 36.573621 | 115.876091 | 219.582368 | 23.564241 | 307.662442 | 109.977813 | 47.654158 | 115.665960 | 124.145645 | 376.510910 |
| max | 167884.620031 | 2.251679e+06 | 0.885510 | 0.825288 | 8597.000000 | 490.302215 | 1864.419872 | 686.097725 | 273.547572 | 7376.139515 | 559.401825 | 371.102219 | 266.413443 | 327.997043 | 580.316440 | 377.455968 | 434.855325 | 509.007485 | 828.207923 | 895.038092 | 841.417222 | 788.886661 | 483.665764 | 692.402872 | 227.875079 | 1778.738531 | 224.928337 | 235.076917 | 220.083131 | 335.853122 | 773.393352 | 530.067121 | 5795.457881 | 942.315732 | 132.548000 | 354.643471 | 776.553136 | 274.249817 | 986.087635 | 787.484329 | 223.794205 | 355.326303 | 778.523095 | 1559.394386 |
from pyspark.sql import functions as F
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
def plot_clusters_on_map(df):
df = df.to_spark()
# Collect the data
data = df.select('latitude', 'longitude', 'clusters').collect()
# Separate the data into lists
latitudes = [row['latitude'] for row in data]
longitudes = [row['longitude'] for row in data]
clusters = [row['clusters'] for row in data]
# Get unique clusters and their range
unique_clusters = sorted(set(clusters))
min_cluster = min(unique_clusters)
max_cluster = max(unique_clusters)
# Create a custom colormap
n_bins = 15 # Number of color bins
colors = plt.cm.rainbow(np.linspace(0, 1, n_bins))
cmap = LinearSegmentedColormap.from_list('custom_cmap', colors, N=n_bins)
# Create the scatter plot with a square aspect ratio
plt.figure(figsize=(12, 12)) # Equal width and height
scatter = plt.scatter(longitudes, latitudes, c=clusters, cmap=cmap,
alpha=0.6, edgecolors='none')
# Add a color bar
cbar = plt.colorbar(scatter)
cbar.set_label('Cluster')
# Set labels and title
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Geographical Distribution of Clusters')
# Set aspect ratio to 'equal' for a true square plot
plt.gca().set_aspect('equal', adjustable='box')
# Add grid lines
plt.grid(True, linestyle='--', alpha=0.7)
# Adjust plot limits to focus on the data
x_range = max(longitudes) - min(longitudes)+ 5
y_range = max(latitudes) - min(latitudes) + 5
max_range = max(x_range, y_range)
x_center = (max(longitudes) + min(longitudes)) / 2
y_center = (max(latitudes) + min(latitudes)) / 2
plt.xlim(x_center - max_range/2, x_center + max_range/2)
plt.ylim(y_center - max_range/2, y_center + max_range/2)
# Improve the layout
plt.tight_layout()
# Display the plot
display(plt.gcf())
plt.close()
print(f"Clusters range from {min_cluster} to {max_cluster}")
# Assuming 'clustered_df' is your DataFrame with clusters, latitude, and longitude
plot_clusters_on_map(clustered_df)
print("Cluster map has been displayed.")
Clusters range from 0 to 68 Cluster map has been displayed.
import pyspark.pandas as ps
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import colorsys
import numpy as np
def generate_colors(n):
HSV_tuples = [(x * 1.0 / n, 0.5, 0.5) for x in range(n)]
return list(map(lambda x: f'rgb{tuple(round(i * 255) for i in colorsys.hsv_to_rgb(*x))}', HSV_tuples))
def calculate_zoom(x1, x2, y1, y2):
max_bound = max(abs(x1-x2), abs(y1-y2)) * 111
return 11.5 - np.log(max_bound)
def plot_clusters_on_map(df):
# Ensure df is a pandas-on-Spark DataFrame
if not isinstance(df, ps.DataFrame):
df = ps.DataFrame(df)
# Convert to numpy arrays
latitudes = df['latitude'].to_numpy()
longitudes = df['longitude'].to_numpy()
predictions = df['clusters'].to_numpy()
# Get unique clusters
unique_clusters = np.unique(predictions)
# Create a colorscale with enough unique colors
colorscale = generate_colors(len(unique_clusters))
# Calculate bounds and zoom
x1, x2 = np.min(longitudes), np.max(longitudes)
y1, y2 = np.min(latitudes), np.max(latitudes)
center_lon = (x1 + x2) / 2
center_lat = (y1 + y2) / 2
zoom = calculate_zoom(x1, x2, y1, y2)
# Create subplot with two columns
fig = make_subplots(rows=1, cols=2, column_widths=[0.8, 0.2],
specs=[[{"type": "scattermapbox"}, {"type": "scatter"}]])
# Calculate the maximum prediction value for size reference
max_prediction = np.max(predictions)
# Add scatter mapbox trace for each cluster
for cluster, color in zip(unique_clusters, colorscale):
cluster_mask = predictions == cluster
cluster_lats = latitudes[cluster_mask]
cluster_lons = longitudes[cluster_mask]
if len(cluster_lats) > 0:
fig.add_trace(
go.Scattermapbox(
lat=cluster_lats,
lon=cluster_lons,
mode='markers',
marker=dict(
size=8,
color=color,
opacity=0.7,
sizemin=3,
sizemode='area',
sizeref=2.*max_prediction/10.**2
),
text=predictions[cluster_mask],
hoverinfo='text',
name=f'Cluster {cluster}',
showlegend=False
),
row=1, col=1
)
# Add traces for the custom legend
for cluster, color in zip(unique_clusters, colorscale):
fig.add_trace(
go.Scatter(
x=[None], y=[None],
mode='markers',
marker=dict(size=10, color=color),
showlegend=True,
name=f'Cluster {cluster}'
),
row=1, col=2
)
# Update layout with calculated center and zoom, and zoom constraints
fig.update_layout(
mapbox=dict(
style="open-street-map",
center=dict(lat=center_lat, lon=center_lon),
zoom=zoom
),
showlegend=True,
legend=dict(
itemsizing='constant',
title='Clusters',
bgcolor='rgba(255,255,255,0.6)',
bordercolor='rgba(0,0,0,0.5)',
borderwidth=1
),
width=1200,
height=800,
hovermode='closest'
)
# Update the second subplot (legend) layout
fig.update_xaxes(visible=False, row=1, col=2)
fig.update_yaxes(visible=False, row=1, col=2)
# Display the plot
fig.show()
# Print cluster range
min_cluster = np.min(predictions)
max_cluster = np.max(predictions)
print(f"Clusters range from {min_cluster} to {max_cluster}")
# Assuming 'clustered_df' is your pandas-on-Spark DataFrame with clusters, latitude, and longitude
plot_clusters_on_map(clustered_df)
print("Interactive cluster map has been displayed.")
Clusters range from 0 to 68 Interactive cluster map has been displayed.